Compares numpy to onnxruntime on simple functions#

onnxruntime can be used a replacement to numpy. It can be used to implement a training algorithm, onnxruntime-training differentiate an onnx graph and runs it to compute the gradient. Simple functions are implemented in ONNX and ran with onnxruntime to update the weights. function_onnx_graph returns many functions used to implement a training algorithm. The following benchmarks compares a couple of implementations:

  • numpy: an implementation based on numpy, not optimized

  • sess: inference through an ONNX graph executed with method onnxruntime.InferenceSession.run

  • bind: inference through an ONNX graph executed with method onnxruntime.InferenceSession.run_with_iobinding

  • run: inference through an ONNX graph executed with method onnxruntime.InferenceSession.run_with_iobinding but without counting the binding assuming input buffers are reused and do not need binding again

axpy#

This function implements Y = f(X1, X2, \alpha) = \alpha X1 + X2.

import numpy
from scipy.special import expit
import pandas
from tqdm import tqdm
from cpyquickhelper.numbers.speed_measure import measure_time
import matplotlib.pyplot as plt
from onnxruntime import InferenceSession
from onnxruntime.capi._pybind_state import (  # pylint: disable=E0611
    SessionIOBinding, OrtDevice as C_OrtDevice,
    OrtValue as C_OrtValue)
from mlprodict.plotting.text_plot import onnx_simple_text_plot
from onnxcustom.utils.onnx_function import function_onnx_graph

fct_onx = function_onnx_graph("axpy")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X1' type=dtype('float32') shape=None
input: name='X2' type=dtype('float32') shape=None
input: name='alpha' type=dtype('float32') shape=[1]
Mul(X1, alpha) -> Mu_C0
  Add(Mu_C0, X2) -> Y
output: name='Y' type=dtype('float32') shape=None

The numpy implementation is the following.

fct_numpy = lambda X1, X2, alpha: X1 * alpha + X2

The benchmark

def reshape(a, dim):
    if len(a.shape) == 2:
        return a[:dim].copy()
    return a


def bind_and_run(sess, bind, names, args, out_names, device):
    for n, a in zip(names, args):
        bind.bind_ortvalue_input(n, a)
    for o in out_names:
        bind.bind_output(o, device)
    sess.run_with_iobinding(bind, None)
    return bind.get_outputs()


def nobind_just_run(sess, bind):
    sess.run_with_iobinding(bind, None)


def benchmark(name, onx, fct_numpy, *args,
              dims=(1, 10, 100, 200, 500, 1000, 2000, 10000)):
    sess = InferenceSession(onx.SerializeToString())
    device = C_OrtDevice(
        C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0)
    names = [i.name for i in sess.get_inputs()]
    out_names = [o.name for o in sess.get_outputs()]
    if len(names) != len(args):
        raise RuntimeError(
            f"Size mismatch {len(names)} != {len(args)}.")

    rows = []
    for dim in tqdm(dims):
        new_args = [reshape(a, dim) for a in args]
        ortvalues = [
            C_OrtValue.ortvalue_from_numpy(a, device)
            for a in new_args]

        ms = measure_time(lambda: fct_numpy(*new_args),
                          repeat=50, number=100)
        ms.update(dict(name=name, impl='numpy', dim=dim))
        rows.append(ms)

        inps = {n: a for n, a in zip(names, new_args)}
        ms = measure_time(lambda: sess.run(None, inps))
        ms.update(dict(name=name, impl='sess', dim=dim))
        rows.append(ms)

        bind = SessionIOBinding(sess._sess)
        ms = measure_time(
            lambda: bind_and_run(
                sess._sess, bind, names, ortvalues, out_names, device))
        ms.update(dict(name=name, impl='bind_run', dim=dim))
        rows.append(ms)

        ms = measure_time(
            lambda: nobind_just_run(sess._sess, bind))
        ms.update(dict(name=name, impl='run', dim=dim))
        rows.append(ms)

    return rows

Back to function axpy.

rows = benchmark(
    'axpy', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.array([0.5], dtype=numpy.float32))

all_rows = []
all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:01,  6.01it/s]
 25%|##5       | 2/8 [00:00<00:00,  6.03it/s]
 38%|###7      | 3/8 [00:00<00:00,  5.61it/s]
 50%|#####     | 4/8 [00:00<00:00,  5.32it/s]
 62%|######2   | 5/8 [00:00<00:00,  4.99it/s]
 75%|#######5  | 6/8 [00:01<00:00,  4.41it/s]
 88%|########7 | 7/8 [00:01<00:00,  4.12it/s]
100%|##########| 8/8 [00:01<00:00,  3.95it/s]
100%|##########| 8/8 [00:01<00:00,  4.50it/s]
impl bind_run numpy run sess
dim
1 0.002833 0.001893 0.001245 0.002739
10 0.002918 0.001811 0.001366 0.002789
100 0.002982 0.002316 0.001419 0.002946
200 0.003037 0.002461 0.001483 0.003040
500 0.003219 0.002727 0.001671 0.003252
1000 0.003573 0.003620 0.002002 0.003690
2000 0.003518 0.003588 0.001974 0.003612
10000 0.003556 0.003589 0.001994 0.003626


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
axpy lower is better
Text(0.5, 1.0, 'axpy\nlower is better')

axpyw#

It does Y, Z = f(X1, X2, G, \alpha, \beta) = (Y, Z) where Z = \beta G + \alpha X1 and Y = Z + X2.

fct_onx = function_onnx_graph("axpyw")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X1' type=dtype('float32') shape=None
input: name='X2' type=dtype('float32') shape=None
input: name='G' type=dtype('float32') shape=None
input: name='alpha' type=dtype('float32') shape=[1]
input: name='beta' type=dtype('float32') shape=[1]
Mul(X1, alpha) -> Mu_C0
Mul(G, beta) -> Mu_C02
  Add(Mu_C0, Mu_C02) -> Z
    Add(Z, X2) -> Y
output: name='Y' type=dtype('float32') shape=None
output: name='Z' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x1, x2, g, alpha, beta: (
    x1 * alpha + x2 + beta * g, x1 * alpha + beta * g)

rows = benchmark(
    'axpyw', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.array([0.5], dtype=numpy.float32),
    numpy.array([0.5], dtype=numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:02,  2.64it/s]
 25%|##5       | 2/8 [00:00<00:02,  2.63it/s]
 38%|###7      | 3/8 [00:01<00:02,  2.38it/s]
 50%|#####     | 4/8 [00:01<00:01,  2.21it/s]
 62%|######2   | 5/8 [00:02<00:01,  2.01it/s]
 75%|#######5  | 6/8 [00:03<00:01,  1.73it/s]
 88%|########7 | 7/8 [00:03<00:00,  1.60it/s]
100%|##########| 8/8 [00:04<00:00,  1.51it/s]
100%|##########| 8/8 [00:04<00:00,  1.78it/s]
impl bind_run numpy run sess
dim
1 0.003720 0.005761 0.001453 0.003533
10 0.003714 0.005803 0.001468 0.003539
100 0.003876 0.007384 0.001589 0.003775
200 0.004040 0.007983 0.001758 0.003973
500 0.004375 0.009317 0.002095 0.004542
1000 0.005165 0.011760 0.002720 0.005728
2000 0.004997 0.011836 0.002697 0.005443
10000 0.005110 0.011962 0.002717 0.005578


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
axpyw lower is better
Text(0.5, 1.0, 'axpyw\nlower is better')

axpyw2#

It implements Y, Z = f(X1, X2, G, \alpha, \beta) = (Y, Z) where Z = \beta G + \alpha X1 and Y = \beta * Z + \alpha X1 + X2.

fct_onx = function_onnx_graph("axpyw2")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X1' type=dtype('float32') shape=None
input: name='X2' type=dtype('float32') shape=None
input: name='G' type=dtype('float32') shape=None
input: name='alpha' type=dtype('float32') shape=[1]
input: name='beta' type=dtype('float32') shape=[1]
Mul(X1, alpha) -> Mu_C0
Mul(G, beta) -> Mu_C03
  Add(Mu_C0, Mu_C03) -> Z
    Mul(Z, beta) -> Mu_C02
  Add(Mu_C0, Mu_C02) -> Ad_C0
    Add(Ad_C0, X2) -> Y
output: name='Y' type=dtype('float32') shape=None
output: name='Z' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x1, x2, g, alpha, beta: (
    x1 * alpha + x2 + beta * (x1 * alpha + beta * g),
    x1 * alpha + beta * g)

rows = benchmark(
    'axpyw2', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.array([0.5], dtype=numpy.float32),
    numpy.array([0.5], dtype=numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:03,  1.96it/s]
 25%|##5       | 2/8 [00:01<00:03,  1.95it/s]
 38%|###7      | 3/8 [00:01<00:02,  1.75it/s]
 50%|#####     | 4/8 [00:02<00:02,  1.63it/s]
 62%|######2   | 5/8 [00:03<00:02,  1.47it/s]
 75%|#######5  | 6/8 [00:04<00:01,  1.25it/s]
 88%|########7 | 7/8 [00:05<00:00,  1.15it/s]
100%|##########| 8/8 [00:06<00:00,  1.09it/s]
100%|##########| 8/8 [00:06<00:00,  1.29it/s]
impl bind_run numpy run sess
dim
1 0.003981 0.008244 0.001662 0.003784
10 0.004026 0.008272 0.001693 0.003777
100 0.004196 0.010704 0.001866 0.004124
200 0.004431 0.011405 0.002078 0.004316
500 0.004958 0.013346 0.002629 0.004968
1000 0.005983 0.017157 0.003522 0.006676
2000 0.005902 0.017183 0.003532 0.006612
10000 0.005970 0.017097 0.003529 0.006772


copy#

It implements a copy.

fct_onx = function_onnx_graph("copy")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=15
input: name='X' type=dtype('float32') shape=None
Identity(X) -> Y
output: name='Y' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x: x.copy()

rows = benchmark(
    'copy', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 25%|##5       | 2/8 [00:00<00:00, 14.01it/s]
 50%|#####     | 4/8 [00:00<00:00, 12.92it/s]
 75%|#######5  | 6/8 [00:00<00:00, 11.66it/s]
100%|##########| 8/8 [00:00<00:00, 10.86it/s]
100%|##########| 8/8 [00:00<00:00, 11.40it/s]
impl bind_run numpy run sess
dim
1 0.001927 0.000438 0.000788 0.001920
10 0.001931 0.000436 0.000796 0.001925
100 0.001953 0.000575 0.000806 0.001997
200 0.001971 0.000631 0.000833 0.002032
500 0.002017 0.000751 0.000874 0.002131
1000 0.002087 0.000869 0.000953 0.002267
2000 0.002103 0.000892 0.000949 0.002271
10000 0.002088 0.000901 0.000953 0.002275


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
copy lower is better
Text(0.5, 1.0, 'copy\nlower is better')

grad_loss_absolute_error#

It implements Y = f(X1, X2) = \lVert X1 - X2 \rVert.

fct_onx = function_onnx_graph("grad_loss_absolute_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X1' type=dtype('float32') shape=[None, None]
input: name='X2' type=dtype('float32') shape=[None, None]
init: name='Re_Reshapecst' type=dtype('int64') shape=(1,) -- array([-1])
Sub(X1, X2) -> Su_C0
  Abs(Su_C0) -> Ab_Y0
    ReduceSum(Ab_Y0) -> Re_reduced0
      Reshape(Re_reduced0, Re_Reshapecst) -> Y
  Sign(Su_C0) -> Y_grad
output: name='Y' type=dtype('float32') shape=None
output: name='Y_grad' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x1, x2: (
    numpy.abs(x1 - x2).sum(), numpy.sign(x1 - x2))

rows = benchmark(
    'grad_loss_absolute_error', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:01,  3.53it/s]
 25%|##5       | 2/8 [00:00<00:01,  3.51it/s]
 38%|###7      | 3/8 [00:00<00:01,  3.12it/s]
 50%|#####     | 4/8 [00:01<00:01,  2.82it/s]
 62%|######2   | 5/8 [00:01<00:01,  2.41it/s]
 75%|#######5  | 6/8 [00:02<00:01,  1.91it/s]
 88%|########7 | 7/8 [00:03<00:00,  1.69it/s]
100%|##########| 8/8 [00:04<00:00,  1.56it/s]
100%|##########| 8/8 [00:04<00:00,  1.96it/s]
impl bind_run numpy run sess
dim
1 0.003387 0.003930 0.001706 0.003222
10 0.003424 0.003962 0.001739 0.003238
100 0.003683 0.005327 0.002006 0.003602
200 0.003976 0.006018 0.002306 0.003922
500 0.004880 0.007797 0.003191 0.004851
1000 0.006326 0.011093 0.004634 0.006537
2000 0.006318 0.011139 0.004635 0.006536
10000 0.006336 0.011175 0.004629 0.006430


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
grad_loss_absolute_error lower is better
Text(0.5, 1.0, 'grad_loss_absolute_error\nlower is better')

grad_loss_square_error#

It implements Y = f(X1, X2) = \lVert X1 - X2 \rVert^2.

fct_onx = function_onnx_graph("grad_loss_square_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X1' type=dtype('float32') shape=[None, None]
input: name='X2' type=dtype('float32') shape=[None, None]
init: name='Mu_Mulcst' type=dtype('float32') shape=(1,) -- array([1.], dtype=float32)
init: name='Re_Reshapecst' type=dtype('int64') shape=(1,) -- array([-1])
init: name='Mu_Mulcst1' type=dtype('float32') shape=(1,) -- array([-2.], dtype=float32)
Sub(X1, X2) -> Su_C0
  Mul(Su_C0, Mu_Mulcst1) -> Y_grad
ReduceSumSquare(Su_C0) -> Re_reduced0
  Mul(Re_reduced0, Mu_Mulcst) -> Mu_C0
    Reshape(Mu_C0, Re_Reshapecst) -> Y
output: name='Y' type=dtype('float32') shape=None
output: name='Y_grad' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x1, x2: (
    ((x1 - x2) ** 2).sum(), (x1 - x2) * (-2))

rows = benchmark(
    'grad_loss_square_error', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:02,  3.02it/s]
 25%|##5       | 2/8 [00:00<00:01,  3.01it/s]
 38%|###7      | 3/8 [00:01<00:01,  2.77it/s]
 50%|#####     | 4/8 [00:01<00:01,  2.62it/s]
 62%|######2   | 5/8 [00:01<00:01,  2.42it/s]
 75%|#######5  | 6/8 [00:02<00:00,  2.14it/s]
 88%|########7 | 7/8 [00:03<00:00,  2.01it/s]
100%|##########| 8/8 [00:03<00:00,  1.93it/s]
100%|##########| 8/8 [00:03<00:00,  2.20it/s]
impl bind_run numpy run sess
dim
1 0.003494 0.004843 0.001738 0.003297
10 0.003480 0.004872 0.001736 0.003300
100 0.003588 0.006033 0.001816 0.003457
200 0.003659 0.006345 0.001911 0.003564
500 0.003915 0.007300 0.002166 0.003863
1000 0.004307 0.009071 0.002562 0.004470
2000 0.004314 0.008895 0.002561 0.004335
10000 0.004320 0.008967 0.002563 0.004391


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
grad_loss_square_error lower is better
Text(0.5, 1.0, 'grad_loss_square_error\nlower is better')

grad_loss_elastic_error#

It implements Y = f(X1, X2) = \beta \lVert X1 - X2 \rVert +
\alpha \lVert X1 - X2 \rVert^2 or Y = f(X1, X2) = \beta \lVert w(X1 - X2) \rVert +
\alpha \lVert (\sqrt{w}(X1 - X2) \rVert^2 if weight_name is not None and its gradient. l1_weight is \beta and l2_weight is \alpha.

fct_onx = function_onnx_graph("grad_loss_elastic_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=15
input: name='X1' type=dtype('float32') shape=[None, None]
input: name='X2' type=dtype('float32') shape=[None, None]
init: name='Mu_Mulcst' type=dtype('float32') shape=(1,) -- array([0.01], dtype=float32)
init: name='Re_Reshapecst' type=dtype('int64') shape=(1,) -- array([-1])
init: name='Mu_Mulcst3' type=dtype('float32') shape=(1,) -- array([-0.02], dtype=float32)
Identity(Mu_Mulcst) -> Mu_Mulcst1
Sub(X1, X2) -> Su_C0
  Abs(Su_C0) -> Ab_Y0
    Mul(Ab_Y0, Mu_Mulcst) -> Mu_C0
Identity(Mu_Mulcst) -> Mu_Mulcst2
Mul(Su_C0, Mu_Mulcst3) -> Mu_C05
Sign(Su_C0) -> Si_output0
  Mul(Si_output0, Mu_Mulcst2) -> Mu_C04
  Add(Mu_C04, Mu_C05) -> Ad_C02
    Identity(Ad_C02) -> Y_grad
  Mul(Su_C0, Su_C0) -> Mu_C03
  Mul(Mu_C03, Mu_Mulcst1) -> Mu_C02
    Add(Mu_C0, Mu_C02) -> Ad_C0
      ReduceSum(Ad_C0) -> Re_reduced0
        Reshape(Re_reduced0, Re_Reshapecst) -> Y
output: name='Y' type=dtype('float32') shape=None
output: name='Y_grad' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x1, x2: (
    numpy.abs(x1 - x2).sum() * 0.1 + ((x1 - x2) ** 2).sum() * 0.9,
    numpy.sign(x1 - x2) * 0.1 - 2 * 0.9 * (x1 - x2))

rows = benchmark(
    'grad_loss_elastic_error', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:05,  1.32it/s]
 25%|##5       | 2/8 [00:01<00:04,  1.31it/s]
 38%|###7      | 3/8 [00:02<00:04,  1.19it/s]
 50%|#####     | 4/8 [00:03<00:03,  1.10it/s]
 62%|######2   | 5/8 [00:04<00:03,  1.02s/it]
 75%|#######5  | 6/8 [00:06<00:02,  1.20s/it]
 88%|########7 | 7/8 [00:07<00:01,  1.32s/it]
100%|##########| 8/8 [00:09<00:00,  1.39s/it]
100%|##########| 8/8 [00:09<00:00,  1.17s/it]
impl bind_run numpy run sess
dim
1 0.004848 0.012548 0.003174 0.004676
10 0.004882 0.012637 0.003201 0.004667
100 0.005349 0.015723 0.003673 0.005219
200 0.005931 0.016927 0.004241 0.005801
500 0.007472 0.019979 0.005782 0.007477
1000 0.010102 0.025098 0.008284 0.010244
2000 0.010109 0.025379 0.008308 0.010165
10000 0.010100 0.025230 0.008299 0.010251


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
grad_loss_elastic_error lower is better
Text(0.5, 1.0, 'grad_loss_elastic_error\nlower is better')

n_penalty_elastic_error#

It implements Y = f(W) = \beta \lVert W \rVert +
\alpha \lVert W \rVert^2 l1_weight is \beta and l2_weight is \alpha. It does that for n_tensors and adds all of the results to an input loss.

fct_onx = function_onnx_graph("n_penalty_elastic_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=15
input: name='loss' type=dtype('float32') shape=[1, 1]
input: name='W0' type=dtype('float32') shape=None
init: name='Mu_Mulcst' type=dtype('float32') shape=(1,) -- array([0.01], dtype=float32)
init: name='Re_Reshapecst' type=dtype('int64') shape=(1,) -- array([-1])
Abs(W0) -> Ab_Y0
  ReduceSum(Ab_Y0) -> Re_reduced0
    Mul(Re_reduced0, Mu_Mulcst) -> Mu_C0
ReduceSumSquare(W0) -> Re_reduced02
Identity(Mu_Mulcst) -> Mu_Mulcst1
  Mul(Re_reduced02, Mu_Mulcst1) -> Mu_C02
    Add(Mu_C0, Mu_C02) -> Ad_C01
      Add(loss, Ad_C01) -> Ad_C0
        Reshape(Ad_C0, Re_Reshapecst) -> Y
output: name='Y' type=dtype('float32') shape=[None]

benchmark

fct_numpy = lambda loss, x: numpy.abs(x).sum() * 0.1 + ((x) ** 2).sum() * 0.9

rows = benchmark(
    'n_penalty_elastic_error', fct_onx, fct_numpy,
    numpy.array([[0.5]], dtype=numpy.float32),
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:03,  2.22it/s]
 25%|##5       | 2/8 [00:00<00:02,  2.21it/s]
 38%|###7      | 3/8 [00:01<00:02,  2.11it/s]
 50%|#####     | 4/8 [00:01<00:01,  2.04it/s]
 62%|######2   | 5/8 [00:02<00:01,  1.94it/s]
 75%|#######5  | 6/8 [00:03<00:01,  1.82it/s]
 88%|########7 | 7/8 [00:03<00:00,  1.74it/s]
100%|##########| 8/8 [00:04<00:00,  1.70it/s]
100%|##########| 8/8 [00:04<00:00,  1.84it/s]
impl bind_run numpy run sess
dim
1 0.003370 0.007245 0.001944 0.003250
10 0.003373 0.007273 0.001940 0.003216
100 0.003419 0.008166 0.002002 0.003307
200 0.003480 0.008448 0.002063 0.003385
500 0.003664 0.009188 0.002255 0.003565
1000 0.003944 0.010253 0.002534 0.003846
2000 0.003975 0.010276 0.002547 0.003814
10000 0.003946 0.010183 0.002527 0.003809


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
n_penalty_elastic_error lower is better
Text(0.5, 1.0, 'n_penalty_elastic_error\nlower is better')

update_penalty_elastic_error#

It implements Y = f(W) = W - 2 \beta W - \alpha sign(W) l1 is \beta and l2 is \alpha.

fct_onx = function_onnx_graph("update_penalty_elastic_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=14
input: name='X' type=dtype('float32') shape=None
init: name='Mu_Mulcst' type=dtype('float32') shape=(1,) -- array([0.9998], dtype=float32)
init: name='Mu_Mulcst1' type=dtype('float32') shape=(1,) -- array([1.e-04], dtype=float32)
Mul(X, Mu_Mulcst) -> Mu_C0
Sign(X) -> Si_output0
  Mul(Si_output0, Mu_Mulcst1) -> Mu_C02
  Sub(Mu_C0, Mu_C02) -> Y
output: name='Y' type=dtype('float32') shape=None

benchmark

fct_numpy = lambda x: numpy.sign(x) * 0.1 + (x * 0.9 * 2)

rows = benchmark(
    'update_penalty_elastic_error', fct_onx, fct_numpy,
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:00<00:02,  3.21it/s]
 25%|##5       | 2/8 [00:00<00:01,  3.19it/s]
 38%|###7      | 3/8 [00:01<00:01,  2.84it/s]
 50%|#####     | 4/8 [00:01<00:01,  2.58it/s]
 62%|######2   | 5/8 [00:02<00:01,  2.21it/s]
 75%|#######5  | 6/8 [00:02<00:01,  1.79it/s]
 88%|########7 | 7/8 [00:03<00:00,  1.60it/s]
100%|##########| 8/8 [00:04<00:00,  1.49it/s]
100%|##########| 8/8 [00:04<00:00,  1.85it/s]
impl bind_run numpy run sess
dim
1 0.002541 0.004873 0.001383 0.002528
10 0.002559 0.004914 0.001407 0.002505
100 0.002847 0.006418 0.001697 0.002879
200 0.003185 0.007086 0.002019 0.003197
500 0.004114 0.008970 0.002958 0.004227
1000 0.005687 0.011972 0.004515 0.005834
2000 0.005694 0.011943 0.004541 0.005832
10000 0.005674 0.012059 0.004516 0.005823


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
update_penalty_elastic_error lower is better
Text(0.5, 1.0, 'update_penalty_elastic_error\nlower is better')

grad_sigmoid_neg_log_loss_error#

See _onnx_grad_sigmoid_neg_log_loss_error.

fct_onx = function_onnx_graph("grad_sigmoid_neg_log_loss_error")
print(onnx_simple_text_plot(fct_onx))
opset: domain='' version=15
input: name='X1' type=dtype('int64') shape=[None, None]
input: name='X2' type=dtype('float32') shape=[None, None]
init: name='Su_Subcst' type=dtype('float32') shape=(1,) -- array([1.], dtype=float32)
init: name='Cl_Clipcst' type=dtype('float32') shape=(1,) -- array([1.e-05], dtype=float32)
init: name='Cl_Clipcst1' type=dtype('float32') shape=(1,) -- array([0.99999], dtype=float32)
init: name='Re_Reshapecst' type=dtype('int64') shape=(1,) -- array([-1])
Cast(X1, to=1) -> Ca_output0
  Sub(Su_Subcst, Ca_output0) -> Su_C0
Identity(Su_Subcst) -> Su_Subcst1
Sigmoid(X2) -> Si_Y0
  Clip(Si_Y0, Cl_Clipcst, Cl_Clipcst1) -> Cl_output0
  Sub(Su_Subcst1, Cl_output0) -> Su_C02
    Log(Su_C02) -> Lo_output0
    Mul(Su_C0, Lo_output0) -> Mu_C0
Log(Cl_output0) -> Lo_output02
  Mul(Ca_output0, Lo_output02) -> Mu_C02
    Add(Mu_C0, Mu_C02) -> Ad_C0
      Neg(Ad_C0) -> Ne_Y0
        ReduceSum(Ne_Y0) -> Re_reduced0
          Reshape(Re_reduced0, Re_Reshapecst) -> Y
  Sub(Cl_output0, Ca_output0) -> Y_grad
output: name='Y' type=dtype('float32') shape=None
output: name='Y_grad' type=dtype('float32') shape=None

benchmark

def loss(x1, x2, eps=1e-5):
    pr = expit(x2)
    cl = numpy.clip(pr, eps, 1 - eps)
    lo = - (1 - x1) * numpy.log(1 - cl) - x1 * numpy.log(cl)
    return lo


fct_numpy = lambda x1, x2: (loss(x1, x2).mean(), expit(x2) - x1)

rows = benchmark(
    'grad_sigmoid_neg_log_loss_error', fct_onx, fct_numpy,
    (numpy.random.randn(1000, 1) > 0).astype(numpy.int64),
    numpy.random.randn(1000, 10).astype(numpy.float32))

all_rows.extend(rows)
piv = pandas.DataFrame(rows).pivot('dim', 'impl', 'average')
piv
  0%|          | 0/8 [00:00<?, ?it/s]
 12%|#2        | 1/8 [00:01<00:09,  1.29s/it]
 25%|##5       | 2/8 [00:02<00:08,  1.34s/it]
 38%|###7      | 3/8 [00:04<00:08,  1.73s/it]
 50%|#####     | 4/8 [00:07<00:08,  2.23s/it]
 62%|######2   | 5/8 [00:13<00:10,  3.38s/it]
 75%|#######5  | 6/8 [00:23<00:11,  5.54s/it]
 88%|########7 | 7/8 [00:32<00:06,  6.90s/it]
100%|##########| 8/8 [00:42<00:00,  7.80s/it]
100%|##########| 8/8 [00:42<00:00,  5.30s/it]
impl bind_run numpy run sess
dim
1 0.005049 0.023152 0.003357 0.004953
10 0.005506 0.024440 0.003780 0.005352
100 0.008368 0.039208 0.006604 0.008238
200 0.011539 0.053302 0.009758 0.011473
500 0.020948 0.096186 0.019108 0.020879
1000 0.036621 0.172751 0.034712 0.036714
2000 0.036585 0.172595 0.034682 0.036642
10000 0.036655 0.172523 0.034723 0.036637


Graph.

name = rows[0]['name']
ax = piv.plot(logx=True, logy=True)
ax.set_title(name + "\nlower is better")
grad_sigmoid_neg_log_loss_error lower is better
Text(0.5, 1.0, 'grad_sigmoid_neg_log_loss_error\nlower is better')

Results#

df = pandas.DataFrame(all_rows)
df
average deviation min_exec max_exec repeat number ttime context_size name impl dim
0 0.001893 0.000577 0.001798 0.005928 50 100 0.094635 64 axpy numpy 1
1 0.002739 0.000060 0.002708 0.002918 10 50 0.027390 64 axpy sess 1
2 0.002833 0.000035 0.002809 0.002932 10 50 0.028326 64 axpy bind_run 1
3 0.001245 0.000014 0.001234 0.001273 10 50 0.012448 64 axpy run 1
4 0.001811 0.000010 0.001800 0.001860 50 100 0.090539 64 axpy numpy 10
... ... ... ... ... ... ... ... ... ... ... ...
315 0.034682 0.000045 0.034644 0.034809 10 50 0.346822 64 grad_sigmoid_neg_log_loss_error run 2000
316 0.172523 0.000231 0.172332 0.173909 50 100 8.626164 64 grad_sigmoid_neg_log_loss_error numpy 10000
317 0.036637 0.000091 0.036597 0.036907 10 50 0.366371 64 grad_sigmoid_neg_log_loss_error sess 10000
318 0.036655 0.000058 0.036620 0.036819 10 50 0.366554 64 grad_sigmoid_neg_log_loss_error bind_run 10000
319 0.034723 0.000045 0.034682 0.034844 10 50 0.347230 64 grad_sigmoid_neg_log_loss_error run 10000

320 rows × 11 columns



Pivot

piv = pandas.pivot_table(
    df, index=['name', 'impl'], columns='dim', values='average')
piv
print(piv)
dim                                          1      ...     10000
name                            impl                ...
axpy                            bind_run  0.002833  ...  0.003556
                                numpy     0.001893  ...  0.003589
                                run       0.001245  ...  0.001994
                                sess      0.002739  ...  0.003626
axpyw                           bind_run  0.003720  ...  0.005110
                                numpy     0.005761  ...  0.011962
                                run       0.001453  ...  0.002717
                                sess      0.003533  ...  0.005578
axpyw2                          bind_run  0.003981  ...  0.005970
                                numpy     0.008244  ...  0.017097
                                run       0.001662  ...  0.003529
                                sess      0.003784  ...  0.006772
copy                            bind_run  0.001927  ...  0.002088
                                numpy     0.000438  ...  0.000901
                                run       0.000788  ...  0.000953
                                sess      0.001920  ...  0.002275
grad_loss_absolute_error        bind_run  0.003387  ...  0.006336
                                numpy     0.003930  ...  0.011175
                                run       0.001706  ...  0.004629
                                sess      0.003222  ...  0.006430
grad_loss_elastic_error         bind_run  0.004848  ...  0.010100
                                numpy     0.012548  ...  0.025230
                                run       0.003174  ...  0.008299
                                sess      0.004676  ...  0.010251
grad_loss_square_error          bind_run  0.003494  ...  0.004320
                                numpy     0.004843  ...  0.008967
                                run       0.001738  ...  0.002563
                                sess      0.003297  ...  0.004391
grad_sigmoid_neg_log_loss_error bind_run  0.005049  ...  0.036655
                                numpy     0.023152  ...  0.172523
                                run       0.003357  ...  0.034723
                                sess      0.004953  ...  0.036637
n_penalty_elastic_error         bind_run  0.003370  ...  0.003946
                                numpy     0.007245  ...  0.010183
                                run       0.001944  ...  0.002527
                                sess      0.003250  ...  0.003809
update_penalty_elastic_error    bind_run  0.002541  ...  0.005674
                                numpy     0.004873  ...  0.012059
                                run       0.001383  ...  0.004516
                                sess      0.002528  ...  0.005823

[40 rows x 8 columns]

Graph.

fig, ax = None, None


for i, name in enumerate(sorted(set(df['name']))):
    if fig is None:
        fig, ax = plt.subplots(2, 2, figsize=(8, 12), sharex=True)
    x, y = (i % 4) // 2, (i % 4) % 2
    piv = df[df.name == name].pivot('dim', 'impl', 'average')
    piv.plot(ax=ax[x, y], logx=True, logy=True)
    ax[x, y].set_title(name)
    ax[x, y].xaxis.set_label_text("")
    if i % 4 == 3:
        fig.suptitle("lower is better")
        fig.tight_layout()
        fig, ax = None, None


if fig is not None:
    fig.suptitle("lower is better")
    fig.tight_layout()


# plt.show()
  • lower is better, axpy, axpyw, axpyw2, copy
  • lower is better, grad_loss_absolute_error, grad_loss_elastic_error, grad_loss_square_error, grad_sigmoid_neg_log_loss_error
  • lower is better, n_penalty_elastic_error, update_penalty_elastic_error

Total running time of the script: ( 1 minutes 45.916 seconds)

Gallery generated by Sphinx-Gallery