Benchmark of onnxruntime on LogisticRegression

The example uses what pymlbenchmark implements, in particular class OnnxRuntimeBenchPerfTestBinaryClassification which defines a side-by-side benchmark to compare the prediction function between scikit-learn, onnxruntime and a simple numpy implementation.

Benchmark function

from time import perf_counter as time
import numpy
import pandas
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LogisticRegression
try:
    from sklearn.utils._testing import ignore_warnings
except ImportError:
    from sklearn.utils.testing import ignore_warnings
from scipy.special import expit
from pymlbenchmark.context import machine_information
from pymlbenchmark.benchmark import BenchPerf
from pymlbenchmark.external import OnnxRuntimeBenchPerfTestBinaryClassification
from pymlbenchmark.plotting import plot_bench_results


class OnnxRuntimeBenchPerfTestBinaryClassification3(
        OnnxRuntimeBenchPerfTestBinaryClassification):
    """
    Overwrites the class to add a pure python implementation
    of the logistic regression.
    """

    def fcts(self, dim=None, **kwargs):

        def predict_py_predict(X, model=self.skl):
            coef = model.coef_
            intercept = model.intercept_
            pred = numpy.dot(X, coef.T) + intercept
            return (pred >= 0).astype(numpy.int32)

        def predict_py_predict_proba(X, model=self.skl):
            coef = model.coef_
            intercept = model.intercept_
            pred = numpy.dot(X, coef.T) + intercept
            decision_2d = numpy.c_[-pred, pred]
            return expit(decision_2d)

        res = OnnxRuntimeBenchPerfTestBinaryClassification.fcts(
            self, dim=dim, **kwargs)
        res.extend([
            {'method': 'predict', 'lib': 'py', 'fct': predict_py_predict},
            {'method': 'predict_proba', 'lib': 'py',
             'fct': predict_py_predict_proba},
        ])
        return res


@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):

    pbefore = dict(dim=[1, 5, 10, 20, 50, 100, 150],
                   fit_intercept=[True, False])
    pafter = dict(N=[1, 10, 100, 1000, 10000])
    test = lambda dim=None, **opts: (
        OnnxRuntimeBenchPerfTestBinaryClassification3(
            LogisticRegression, dim=dim, **opts))
    bp = BenchPerf(pbefore, pafter, test)

    with sklearn.config_context(assume_finite=True):
        start = time()
        results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose))
        end = time()

    results_df = pandas.DataFrame(results)
    print("Total time = %0.3f sec\n" % (end - start))
    return results_df

Run the benchmark

df = run_bench(verbose=True)
df.to_csv("bench_plot_onnxruntime_logistic_regression.perf.csv", index=False)
print(df.head(n=4).T)

Out:

  0%|          | 0/70 [00:00<?, ?it/s]
  1%|1         | 1/70 [00:03<04:08,  3.60s/it]
  3%|2         | 2/70 [00:04<02:22,  2.09s/it]
  4%|4         | 3/70 [00:09<03:31,  3.15s/it]
  6%|5         | 4/70 [00:46<18:24, 16.73s/it]
  7%|7         | 5/70 [00:47<11:59, 11.07s/it]
  9%|8         | 6/70 [00:48<08:02,  7.54s/it]
 10%|#         | 7/70 [00:49<05:40,  5.41s/it]
 11%|#1        | 8/70 [00:53<05:13,  5.06s/it]
 13%|#2        | 9/70 [01:30<15:19, 15.08s/it]
 14%|#4        | 10/70 [01:31<10:46, 10.77s/it]
 16%|#5        | 11/70 [01:32<07:33,  7.68s/it]
 17%|#7        | 12/70 [01:33<05:28,  5.66s/it]
 19%|#8        | 13/70 [01:37<05:00,  5.26s/it]
 20%|##        | 14/70 [02:15<14:00, 15.02s/it]
 21%|##1       | 15/70 [02:16<09:55, 10.82s/it]
 23%|##2       | 16/70 [02:17<06:59,  7.77s/it]
 24%|##4       | 17/70 [02:18<05:04,  5.75s/it]
 26%|##5       | 18/70 [02:22<04:37,  5.34s/it]
 27%|##7       | 19/70 [03:00<12:45, 15.02s/it]
 29%|##8       | 20/70 [03:01<09:01, 10.84s/it]
 30%|###       | 21/70 [03:02<06:21,  7.79s/it]
 31%|###1      | 22/70 [03:03<04:36,  5.77s/it]
 33%|###2      | 23/70 [03:07<04:11,  5.36s/it]
 34%|###4      | 24/70 [03:45<11:34, 15.10s/it]
 36%|###5      | 25/70 [03:46<08:11, 10.91s/it]
 37%|###7      | 26/70 [03:47<05:45,  7.85s/it]
 39%|###8      | 27/70 [03:48<04:09,  5.80s/it]
 40%|####      | 28/70 [03:52<03:46,  5.39s/it]
 41%|####1     | 29/70 [04:30<10:22, 15.17s/it]
 43%|####2     | 30/70 [04:32<07:21, 11.03s/it]
 44%|####4     | 31/70 [04:32<05:09,  7.93s/it]
 46%|####5     | 32/70 [04:33<03:42,  5.86s/it]
 47%|####7     | 33/70 [04:38<03:21,  5.46s/it]
 49%|####8     | 34/70 [05:16<09:15, 15.43s/it]
 50%|#####     | 35/70 [05:18<06:31, 11.20s/it]
 51%|#####1    | 36/70 [05:18<04:33,  8.05s/it]
 53%|#####2    | 37/70 [05:20<03:16,  5.95s/it]
 54%|#####4    | 38/70 [05:24<02:56,  5.51s/it]
 56%|#####5    | 39/70 [06:03<07:57, 15.41s/it]
 57%|#####7    | 40/70 [06:04<05:39, 11.31s/it]
 59%|#####8    | 41/70 [06:05<03:55,  8.13s/it]
 60%|######    | 42/70 [06:06<02:48,  6.01s/it]
 61%|######1   | 43/70 [06:11<02:31,  5.62s/it]
 63%|######2   | 44/70 [06:51<06:56, 16.01s/it]
 64%|######4   | 45/70 [06:53<04:54, 11.80s/it]
 66%|######5   | 46/70 [06:54<03:23,  8.47s/it]
 67%|######7   | 47/70 [06:55<02:25,  6.31s/it]
 69%|######8   | 48/70 [07:00<02:08,  5.86s/it]
 70%|#######   | 49/70 [07:40<05:39, 16.15s/it]
 71%|#######1  | 50/70 [07:42<03:57, 11.87s/it]
 73%|#######2  | 51/70 [07:43<02:41,  8.52s/it]
 74%|#######4  | 52/70 [07:44<01:53,  6.30s/it]
 76%|#######5  | 53/70 [07:49<01:41,  5.95s/it]
 77%|#######7  | 54/70 [08:32<04:32, 17.01s/it]
 79%|#######8  | 55/70 [08:34<03:09, 12.62s/it]
 80%|########  | 56/70 [08:35<02:06,  9.05s/it]
 81%|########1 | 57/70 [08:36<01:26,  6.67s/it]
 83%|########2 | 58/70 [08:41<01:14,  6.19s/it]
 84%|########4 | 59/70 [09:24<03:08, 17.18s/it]
 86%|########5 | 60/70 [09:27<02:09, 12.91s/it]
 87%|########7 | 61/70 [09:27<01:23,  9.25s/it]
 89%|########8 | 62/70 [09:29<00:54,  6.82s/it]
 90%|######### | 63/70 [09:34<00:44,  6.40s/it]
 91%|#########1| 64/70 [10:19<01:47, 17.91s/it]
 93%|#########2| 65/70 [10:22<01:06, 13.38s/it]
 94%|#########4| 66/70 [10:22<00:38,  9.58s/it]
 96%|#########5| 67/70 [10:23<00:21,  7.05s/it]
 97%|#########7| 68/70 [10:29<00:13,  6.55s/it]
 99%|#########8| 69/70 [11:14<00:18, 18.01s/it]
 99%|#########8| 69/70 [11:14<00:09,  9.77s/it]
Total time = 674.247 sec

                               0  ...              3
method                   predict  ...  predict_proba
lib                          skl  ...            skl
skl_nb_base_estimators       1.0  ...            1.0
N                              1  ...              1
dim                            1  ...              1
fit_intercept               True  ...           True
repeat                       100  ...            100
number                         1  ...              1
min                      0.00018  ...       0.000253
max                     0.000382  ...       0.000573
min3                     0.00018  ...       0.000254
max3                    0.000219  ...       0.000429
mean                    0.000187  ...       0.000274
lower                    0.00018  ...       0.000253
upper                   0.000229  ...       0.000362
count                        100  ...            100
median                  0.000182  ...        0.00026
error_c                        0  ...              0
onnx_nodes                   NaN  ...            NaN
onnx_opset                   NaN  ...            NaN
ort_size                     NaN  ...            NaN

[21 rows x 4 columns]

Extract information about the machine used

pkgs = ['numpy', 'pandas', 'sklearn', 'skl2onnx',
        'onnxruntime', 'onnx', 'mlprodict']
dfi = pandas.DataFrame(machine_information(pkgs))
dfi.to_csv("bench_plot_onnxruntime_logistic_regression.time.csv", index=False)
print(dfi)

Out:

                         name  ...                                              value
0                        date  ...                                                NaN
1                      python  ...  3.9.1 (default, Jan 18 2021, 16:35:58) \n[GCC ...
2                    platform  ...                                              linux
3                          OS  ...        Linux-4.19.0-13-amd64-x86_64-with-glibc2.28
4                     machine  ...                                             x86_64
5                   processor  ...
6                     release  ...                                    4.19.0-13-amd64
7                architecture  ...                                       (64bit, ELF)
8                        arch  ...                                             X86_64
9                   brand_raw  ...            Intel(R) Atom(TM) CPU  C2750  @ 2.40GHz
10                      count  ...                                                  8
11                      flags  ...  3dnowprefetch acpi aes aperfmperf apic arat ar...
12              hz_advertised  ...                                    [2400000000, 0]
13         l1_data_cache_size  ...                                              24576
14  l1_instruction_cache_size  ...                                              32768
15     l2_cache_associativity  ...                                                  8
16         l2_cache_line_size  ...                                               1024
17              l2_cache_size  ...                                            1048576
18              l3_cache_size  ...                                            1048576
19                   stepping  ...                                                  8
20                  mlprodict  ...                                                NaN
21                      numpy  ...                               openblas, language=c
22                       onnx  ...                                           opset=15
23                onnxruntime  ...                                                CPU
24                     pandas  ...                                                NaN
25                   skl2onnx  ...                                                NaN
26                    sklearn  ...                                                NaN

[27 rows x 3 columns]

Plot the results

def label_fct(la):
    la = la.replace("onxpython_compiled", "opy")
    la = la.replace("onxpython", "opy")
    la = la.replace("onxonnxruntime1", "ort")
    la = la.replace("fit_intercept", "fi")
    la = la.replace("True", "1")
    la = la.replace("False", "0")
    return la


def color_fct(la, col):
    if "onxpython_compiled" in la:
        return "red"
    if "onxpython" in la:
        return "red"
    return col


plot_bench_results(
    df, row_cols=['N', 'fit_intercept'], col_cols='method', x_value='dim',
    title="LogisticRegression\nBenchmark scikit-learn / onnxruntime",
    label_fct=label_fct, color_fct=color_fct)
plt.show()
LogisticRegression Benchmark scikit-learn / onnxruntime, method=predict, method=predict_proba

Total running time of the script: ( 11 minutes 42.139 seconds)

Gallery generated by Sphinx-Gallery