Note
Click here to download the full example code
Investigate a failure from a benchmark¶
The method validate
may raise an exception and
in that case, the class BenchPerfTest
.
The following script shows how to investigate.
from onnxruntime import InferenceSession
from pickle import load
from time import time
import numpy
from numpy.testing import assert_almost_equal
import matplotlib.pyplot as plt
import pandas
from scipy.special import expit
import sklearn
from sklearn.utils._testing import ignore_warnings
from sklearn.linear_model import LogisticRegression
from pymlbenchmark.benchmark import BenchPerf
from pymlbenchmark.external import OnnxRuntimeBenchPerfTestBinaryClassification
Defines the benchmark and runs it¶
class OnnxRuntimeBenchPerfTestBinaryClassification3(
OnnxRuntimeBenchPerfTestBinaryClassification):
"""
Overwrites the class to add a pure python implementation
of the logistic regression.
"""
def fcts(self, dim=None, **kwargs):
def predict_py_predict(X, model=self.skl):
coef = model.coef_
intercept = model.intercept_
pred = numpy.dot(X, coef.T) + intercept
return (pred >= 0).astype(numpy.int32)
def predict_py_predict_proba(X, model=self.skl):
coef = model.coef_
intercept = model.intercept_
pred = numpy.dot(X, coef.T) + intercept
decision_2d = numpy.c_[-pred, pred]
return expit(decision_2d)
res = OnnxRuntimeBenchPerfTestBinaryClassification.fcts(
self, dim=dim, **kwargs)
res.extend([
{'method': 'predict', 'lib': 'py', 'fct': predict_py_predict},
{'method': 'predict_proba', 'lib': 'py',
'fct': predict_py_predict_proba},
])
return res
def validate(self, results, **kwargs):
"""
Raises an exception and locally dump everything we need
to investigate.
"""
# Checks that methods *predict* and *predict_proba* returns
# the same results for both scikit-learn and onnxruntime.
OnnxRuntimeBenchPerfTestBinaryClassification.validate(
self, results, **kwargs)
# Let's dump anything we need for later.
# kwargs contains the input data.
self.dump_error("Just for fun", skl=self.skl,
ort_onnx=self.ort_onnx,
results=results, **kwargs)
raise AssertionError("Just for fun")
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=10, verbose=False):
pbefore = dict(dim=[1, 5], fit_intercept=[True])
pafter = dict(N=[1, 10, 100])
test = lambda dim=None, **opts: (
OnnxRuntimeBenchPerfTestBinaryClassification3(
LogisticRegression, dim=dim, **opts))
bp = BenchPerf(pbefore, pafter, test)
with sklearn.config_context(assume_finite=True):
start = time()
results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose))
end = time()
results_df = pandas.DataFrame(results)
print("Total time = %0.3f sec\n" % (end - start))
return results_df
Runs the benchmark.
try:
run_bench(verbose=True)
except AssertionError as e:
print(e)
Out:
0%| | 0/6 [00:00<?, ?it/s]Just for fun
0%| | 0/6 [00:00<?, ?it/s]
Investigation¶
Let’s retrieve what was dumped.
filename = "BENCH-ERROR-OnnxRuntimeBenchPerfTestBinaryClassification3-0.pkl"
try:
with open(filename, "rb") as f:
data = load(f)
good = True
except Exception as e:
print(e)
good = False
if good:
print(list(sorted(data)))
print("msg:", data["msg"])
print(list(sorted(data["data"])))
print(data["data"]['skl'])
Out:
['data', 'msg']
msg: Just for fun
['data', 'ort_onnx', 'results', 'skl']
LogisticRegression()
The input data is the following:
if good:
print(data['data']['data'])
Out:
[(array([[0.12337018]], dtype=float32),), (array([[0.9729736]], dtype=float32),), (array([[0.13900226]], dtype=float32),), (array([[0.5781039]], dtype=float32),), (array([[0.80378735]], dtype=float32),), (array([[0.15914895]], dtype=float32),), (array([[0.13325912]], dtype=float32),), (array([[0.5843828]], dtype=float32),), (array([[0.39938942]], dtype=float32),), (array([[0.2392732]], dtype=float32),)]
Let’s compare predictions.
if good:
model_skl = data["data"]['skl']
model_onnx = InferenceSession(data["data"]['ort_onnx'].SerializeToString())
input_name = model_onnx.get_inputs()[0].name
def ort_predict_proba(sess, input, input_name):
res = model_onnx.run(None, {input_name: input.astype(numpy.float32)})[1]
return pandas.DataFrame(res).values
if good:
pred_skl = [model_skl.predict_proba(input[0])
for input in data['data']['data']]
pred_onnx = [ort_predict_proba(model_onnx, input[0], input_name)
for input in data['data']['data']]
print(pred_skl)
print(pred_onnx)
Out:
[array([[0.50065085, 0.49934915]]), array([[0.50180121, 0.49819879]]), array([[0.50067202, 0.49932798]]), array([[0.50126656, 0.49873344]]), array([[0.50157214, 0.49842786]]), array([[0.5006993, 0.4993007]]), array([[0.50066424, 0.49933576]]), array([[0.50127506, 0.49872494]]), array([[0.50102458, 0.49897542]]), array([[0.50080779, 0.49919221]])]
[array([[0.50065082, 0.49934915]]), array([[0.50180119, 0.49819878]]), array([[0.50067204, 0.49932799]]), array([[0.50126654, 0.49873343]]), array([[0.50157213, 0.49842787]]), array([[0.50069928, 0.49930069]]), array([[0.50066423, 0.49933577]]), array([[0.50127506, 0.49872494]]), array([[0.5010246 , 0.49897543]]), array([[0.50080776, 0.49919221]])]
They look the same. Let’s check…
if good:
for a, b in zip(pred_skl, pred_onnx):
assert_almost_equal(a, b)
Computing differences.
if good:
def diff(a, b):
return numpy.max(numpy.abs(a.ravel() - b.ravel()))
diffs = list(sorted(diff(a, b) for a, b in zip(pred_skl, pred_onnx)))
plt.plot(diffs)
plt.title(
"Differences between prediction with\nscikit-learn and onnxruntime"
"\nfor Logistic Regression")
plt.show()

Total running time of the script: ( 0 minutes 0.645 seconds)