{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Optimisation de code avec cffi, numba, cython\n", "\n", "L'id\u00e9e est de recoder une fonction en C. On prend comme exemple la fonction de pr\u00e9diction de la r\u00e9gression lin\u00e9aire de [scikit-learn](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html) et de pr\u00e9voir le gain de temps qu'on obtient en recodant la fonction dans un langage plus rapide."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["
run previous cell, wait for 2 seconds
\n", ""], "text/plain": [""]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["memo_time = []\n", "import timeit\n", "\n", "def unit(x):\n", " if x >= 1: return \"%1.2f s\" % x\n", " elif x >= 1e-3: return \"%1.2f ms\" % (x* 1000)\n", " elif x >= 1e-6: return \"%1.2f \u00b5s\" % (x* 1000**2)\n", " elif x >= 1e-9: return \"%1.2f ns\" % (x* 1000**3)\n", " else:\n", " return \"%1.2g s\" % x\n", "\n", "def timeexe(legend, code, number=100, repeat=1000):\n", " rep = timeit.repeat(code, number=number, repeat=repeat, globals=globals())\n", " ave = sum(rep) / (number * repeat)\n", " std = (sum((x/number - ave)**2 for x in rep) / repeat)**0.5\n", " fir = rep[0]/number\n", " fir3 = sum(rep[:3]) / (3 * number)\n", " las3 = sum(rep[-3:]) / (3 * number)\n", " rep.sort()\n", " mini = rep[len(rep)//20] / number\n", " maxi = rep[-len(rep)//20] / number\n", " print(\"Moyenne: %s Ecart-type %s (with %d runs) in [%s, %s]\" % (\n", " unit(ave), unit(std), number, unit(mini), unit(maxi)))\n", " return dict(legend=legend, average=ave, deviation=std, first=fir, first3=fir3,\n", " last3=las3, repeat=repeat, min5=mini, max5=maxi, code=code, run=number)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## R\u00e9gression lin\u00e9aire"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": ["from sklearn.datasets import load_diabetes\n", "from sklearn.model_selection import train_test_split\n", "\n", "diabetes = load_diabetes()\n", "diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test = train_test_split(diabetes.data, diabetes.target)"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [{"data": {"text/plain": ["LinearRegression()"]}, "execution_count": 5, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.linear_model import LinearRegression\n", "clr = LinearRegression()\n", "clr.fit(diabetes_X_train, diabetes_y_train)"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([ -35.81159278, -267.39308261, 503.56121841, 337.87944184,\n", " -577.27255236, 373.62939477, -99.69779327, 78.39842094,\n", " 656.54309153, 80.3383998 ])"]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["clr.coef_"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/plain": ["152.69613239933642"]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["clr.intercept_"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 45.50 \u00b5s Ecart-type 6.34 \u00b5s (with 100 runs) in [40.87 \u00b5s, 52.95 \u00b5s]\n"]}], "source": ["z = diabetes_X_test[0:1,:]\n", "memo_time.append(timeexe(\"sklearn.predict\", \"clr.predict(z)\"))"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["45.2 \u00b5s \u00b1 744 ns per loop (mean \u00b1 std. dev. of 7 runs, 10,000 loops each)\n"]}], "source": ["%timeit clr.predict(z)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### optimisation avec cffi\n", "\n", "On s'inspire de l'exemple [Purely for performance (API level, out-of-line)](http://cffi.readthedocs.io/en/latest/overview.html?highlight=example#purely-for-performance-api-level-out-of-line)."]}, {"cell_type": "code", "execution_count": 9, "metadata": {"scrolled": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["generating .\\_linear_regression.c\n", "(already up-to-date)\n", "the current directory is 'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a'\n", "running build_ext\n", "building '_linear_regression' extension\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\cl.exe /c /nologo /Ox /W3 /GL /DNDEBUG /MD -IC:\\Python395_x64\\include -IC:\\Python395_x64\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\include -IC:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt /Tc_linear_regression.c /Fo.\\Release\\_linear_regression.obj\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\link.exe /nologo /INCREMENTAL:NO /LTCG /DLL /MANIFEST:EMBED,ID=2 /MANIFESTUAC:NO /LIBPATH:C:\\Python395_x64\\libs /LIBPATH:C:\\Python395_x64\\PCbuild\\amd64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x64 /EXPORT:PyInit__linear_regression .\\Release\\_linear_regression.obj /OUT:.\\_linear_regression.cp39-win_amd64.pyd /IMPLIB:.\\Release\\_linear_regression.cp39-win_amd64.lib\n"]}, {"data": {"text/plain": ["'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a\\\\_linear_regression.cp39-win_amd64.pyd'"]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["from cffi import FFI\n", "ffibuilder = FFI()\n", "\n", "ffibuilder.cdef(\"int linreg(int, double *, double *, double, double *);\")\n", "\n", "ffibuilder.set_source(\"_linear_regression\",\n", "r\"\"\"\n", " static int linreg(int dimension, double * x, double *coef, double intercept, double * out)\n", " {\n", " for(; dimension > 0; --dimension, ++x, ++coef)\n", " intercept += *x * *coef;\n", " *out = intercept;\n", " return 1;\n", " }\n", "\"\"\")\n", "\n", "ffibuilder.compile(verbose=True)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La fonction compil\u00e9e est accessible comme suit."]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"data": {"text/plain": [""]}, "execution_count": 11, "metadata": {}, "output_type": "execute_result"}], "source": ["from _linear_regression import ffi, lib\n", "lib.linreg"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On s'inspire de l'exemple [How to pass a Numpy array into a cffi function and how to get one back out?](https://stackoverflow.com/questions/16276268/how-to-pass-a-numpy-array-into-a-cffi-function-and-how-to-get-one-back-out)."]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": ["import numpy\n", "out = numpy.zeros(1)"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": ["ptr_coef = clr.coef_.__array_interface__['data'][0]\n", "cptr_coef = ffi.cast ( \"double*\" , ptr_coef )"]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": ["x = diabetes_X_test[0:1,:]\n", "ptr_x = x.__array_interface__['data'][0]\n", "cptr_x = ffi.cast ( \"double*\" , ptr_x )"]}, {"cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": ["ptr_out = out.__array_interface__['data'][0]\n", "cptr_out = ffi.cast ( \"double*\" , ptr_out )"]}, {"cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [{"data": {"text/plain": ["1"]}, "execution_count": 16, "metadata": {}, "output_type": "execute_result"}], "source": ["n = len(clr.coef_)\n", "lib.linreg(n, cptr_x, cptr_coef, clr.intercept_, cptr_out)"]}, {"cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([214.72477745])"]}, "execution_count": 17, "metadata": {}, "output_type": "execute_result"}], "source": ["out"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On v\u00e9rifie qu'on obtient bien la m\u00eame chose."]}, {"cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([214.72477745])"]}, "execution_count": 18, "metadata": {}, "output_type": "execute_result"}], "source": ["clr.predict(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Et on mesure le temps d'ex\u00e9cution :"]}, {"cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 831.37 ns Ecart-type 708.08 ns (with 100 runs) in [416.00 ns, 1.52 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg\", \"lib.linreg(n, cptr_x, cptr_coef, clr.intercept_, cptr_out)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est beaucoup plus rapide. Pour \u00eatre totalement honn\u00eate, il faut mesurer les \u00e9tapes qui consiste \u00e0 extraire les pointeurs."]}, {"cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([154.32457426])"]}, "execution_count": 20, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr(x, clr):\n", " out = numpy.zeros(1)\n", " ptr_coef = clr.coef_.__array_interface__['data'][0]\n", " cptr_coef = ffi.cast ( \"double*\" , ptr_coef )\n", " ptr_x = x.__array_interface__['data'][0]\n", " cptr_x = ffi.cast ( \"double*\" , ptr_x ) \n", " ptr_out = out.__array_interface__['data'][0]\n", " cptr_out = ffi.cast ( \"double*\" , ptr_out ) \n", " lib.linreg(len(x), cptr_x, cptr_coef, clr.intercept_, cptr_out)\n", " return out\n", "\n", "predict_clr(x, clr)"]}, {"cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 7.52 \u00b5s Ecart-type 2.34 \u00b5s (with 100 runs) in [6.20 \u00b5s, 10.42 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg-wrapped\", \"predict_clr(x, clr)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Cela reste plus rapide."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### cffi - seconde version\n", "\n", "Comme on construit la fonction en dynamique (le code est connu lors de l'ex\u00e9cution), on peut facilement se passer de la boucle et \u00e9crire le code sans boucle et avec les coefficients."]}, {"cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [{"data": {"text/plain": ["'-35.81159277952622*x[0] + -267.39308260812277*x[1] + 503.56121841083586*x[2] + 337.87944183803455*x[3] + -577.2725523621144*x[4] + 373.6293947654621*x[5] + -99.69779326605845*x[6] + 78.39842093764699*x[7] + 656.5430915289373*x[8] + 80.33839980437061*x[9]'"]}, "execution_count": 22, "metadata": {}, "output_type": "execute_result"}], "source": ["res = \" + \".join(\"{0}*x[{1}]\".format(c, i) for i, c in enumerate(clr.coef_))\n", "res"]}, {"cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", " static int linreg_custom(double * x, double * out)\n", " {\n", " out[0] = 152.69613239933642 + -35.81159277952622*x[0] + -267.39308260812277*x[1] + 503.56121841083586*x[2] + 337.87944183803455*x[3] + -577.2725523621144*x[4] + 373.6293947654621*x[5] + -99.69779326605845*x[6] + 78.39842093764699*x[7] + 656.5430915289373*x[8] + 80.33839980437061*x[9];\n", " }\n", "\n"]}], "source": ["code = \"\"\"\n", " static int linreg_custom(double * x, double * out)\n", " {{\n", " out[0] = {0} + {1};\n", " }}\n", "\"\"\".format(clr.intercept_, res)\n", "print(code)"]}, {"cell_type": "code", "execution_count": 23, "metadata": {"scrolled": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["generating .\\_linear_regression_custom.c\n", "the current directory is 'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a'\n", "running build_ext\n", "building '_linear_regression_custom' extension\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\cl.exe /c /nologo /Ox /W3 /GL /DNDEBUG /MD -IC:\\Python395_x64\\include -IC:\\Python395_x64\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\include -IC:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt /Tc_linear_regression_custom.c /Fo.\\Release\\_linear_regression_custom.obj\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\link.exe /nologo /INCREMENTAL:NO /LTCG /DLL /MANIFEST:EMBED,ID=2 /MANIFESTUAC:NO /LIBPATH:C:\\Python395_x64\\libs /LIBPATH:C:\\Python395_x64\\PCbuild\\amd64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x64 /EXPORT:PyInit__linear_regression_custom .\\Release\\_linear_regression_custom.obj /OUT:.\\_linear_regression_custom.cp39-win_amd64.pyd /IMPLIB:.\\Release\\_linear_regression_custom.cp39-win_amd64.lib\n"]}, {"data": {"text/plain": ["'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a\\\\_linear_regression_custom.cp39-win_amd64.pyd'"]}, "execution_count": 24, "metadata": {}, "output_type": "execute_result"}], "source": ["from cffi import FFI\n", "ffibuilder = FFI()\n", "\n", "ffibuilder.cdef(\"int linreg_custom(double *, double *);\")\n", "ffibuilder.set_source(\"_linear_regression_custom\", code)\n", "ffibuilder.compile(verbose=True)"]}, {"cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([214.72477745])"]}, "execution_count": 25, "metadata": {}, "output_type": "execute_result"}], "source": ["from _linear_regression_custom.lib import linreg_custom\n", "linreg_custom(cptr_x, cptr_out)\n", "out"]}, {"cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 466.52 ns Ecart-type 851.96 ns (with 100 runs) in [315.00 ns, 715.00 ns]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg-custom\", \"linreg_custom(cptr_x, cptr_out)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On a gagn\u00e9 un facteur 2."]}, {"cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([214.72477745])"]}, "execution_count": 27, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_custom(x):\n", " out = numpy.zeros(1)\n", " ptr_x = x.__array_interface__['data'][0]\n", " cptr_x = ffi.cast(\"double*\", ptr_x)\n", " ptr_out = out.__array_interface__['data'][0]\n", " cptr_out = ffi.cast(\"double*\", ptr_out)\n", " linreg_custom(cptr_x, cptr_out)\n", " return out\n", "\n", "predict_clr_custom(x)"]}, {"cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 5.27 \u00b5s Ecart-type 1.82 \u00b5s (with 100 runs) in [4.42 \u00b5s, 7.77 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg-custom wrapped\", \"predict_clr_custom(x)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est un peu plus rapide."]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["### et en float?\n", "\n", "L'ordinateur fait la distinction entre les [double](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) code sur 64 bit et les [float](https://en.wikipedia.org/wiki/Single-precision_floating-point_format) cod\u00e9 sur 32 bits. La pr\u00e9cision est meilleure dans le premier cas et les calculs sont plus rapides dans le second. Dans le cas du machine learning, on pr\u00e9f\u00e8re la rapidit\u00e9 \u00e0 une perte pr\u00e9cision en pr\u00e9cision qui est souvent compens\u00e9e par l'optimisation inh\u00e9rente \u00e0 tout probl\u00e8me de machine learning. Ce qu'on perd sur une observation, on le retrouve sur une autre."]}, {"cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [{"data": {"text/plain": ["'-35.81159277952622f*x[0] + -267.39308260812277f*x[1] + 503.56121841083586f*x[2] + 337.87944183803455f*x[3] + -577.2725523621144f*x[4] + 373.6293947654621f*x[5] + -99.69779326605845f*x[6] + 78.39842093764699f*x[7] + 656.5430915289373f*x[8] + 80.33839980437061f*x[9]'"]}, "execution_count": 29, "metadata": {}, "output_type": "execute_result"}], "source": ["res = \" + \".join(\"{0}f*x[{1}]\".format(c, i) for i, c in enumerate(clr.coef_))\n", "res"]}, {"cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", " static int linreg_custom_float(float * x, float * out)\n", " {\n", " out[0] = 152.69613239933642f + -35.81159277952622f*x[0] + -267.39308260812277f*x[1] + 503.56121841083586f*x[2] + 337.87944183803455f*x[3] + -577.2725523621144f*x[4] + 373.6293947654621f*x[5] + -99.69779326605845f*x[6] + 78.39842093764699f*x[7] + 656.5430915289373f*x[8] + 80.33839980437061f*x[9];\n", " }\n", "\n"]}], "source": ["code = \"\"\"\n", " static int linreg_custom_float(float * x, float * out)\n", " {{\n", " out[0] = {0}f + {1};\n", " }}\n", "\"\"\".format(clr.intercept_, res)\n", "print(code)"]}, {"cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["generating .\\_linear_regression_custom_float.c\n", "the current directory is 'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a'\n", "running build_ext\n", "building '_linear_regression_custom_float' extension\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\cl.exe /c /nologo /Ox /W3 /GL /DNDEBUG /MD -IC:\\Python395_x64\\include -IC:\\Python395_x64\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\include -IC:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt /Tc_linear_regression_custom_float.c /Fo.\\Release\\_linear_regression_custom_float.obj\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\link.exe /nologo /INCREMENTAL:NO /LTCG /DLL /MANIFEST:EMBED,ID=2 /MANIFESTUAC:NO /LIBPATH:C:\\Python395_x64\\libs /LIBPATH:C:\\Python395_x64\\PCbuild\\amd64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x64 /EXPORT:PyInit__linear_regression_custom_float .\\Release\\_linear_regression_custom_float.obj /OUT:.\\_linear_regression_custom_float.cp39-win_amd64.pyd /IMPLIB:.\\Release\\_linear_regression_custom_float.cp39-win_amd64.lib\n"]}, {"data": {"text/plain": ["'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a\\\\_linear_regression_custom_float.cp39-win_amd64.pyd'"]}, "execution_count": 31, "metadata": {}, "output_type": "execute_result"}], "source": ["from cffi import FFI\n", "ffibuilder = FFI()\n", "\n", "ffibuilder.cdef(\"int linreg_custom_float(float *, float *);\")\n", "ffibuilder.set_source(\"_linear_regression_custom_float\", code)\n", "ffibuilder.compile(verbose=True)"]}, {"cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": ["from _linear_regression_custom_float.lib import linreg_custom_float"]}, {"cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": ["def predict_clr_custom_float(x):\n", " out = numpy.zeros(1, dtype=numpy.float32)\n", " ptr_x = x.__array_interface__['data'][0]\n", " cptr_x = ffi.cast ( \"float*\" , ptr_x ) \n", " ptr_out = out.__array_interface__['data'][0]\n", " cptr_out = ffi.cast ( \"float*\" , ptr_out ) \n", " linreg_custom_float(cptr_x, cptr_out)\n", " return out"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Avant d'appeler la fonction, on doit transformer le vecteur iniatial en [float32](https://docs.scipy.org/doc/numpy/user/basics.types.html)."]}, {"cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([1.27301276e+31])"]}, "execution_count": 34, "metadata": {}, "output_type": "execute_result"}], "source": ["x32 = x.astype(numpy.float32)\n", "predict_clr_custom(x32)"]}, {"cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 5.12 \u00b5s Ecart-type 1.60 \u00b5s (with 100 runs) in [4.48 \u00b5s, 6.44 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg-custom-float wrapped\", \"predict_clr_custom(x32)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La diff\u00e9rence n'est pas flagrante. Mesurons le code C uniquement m\u00eame si la partie Python ne peut pas \u00eatre compl\u00e8tement \u00e9vit\u00e9e."]}, {"cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 389.19 ns Ecart-type 226.75 ns (with 100 runs) in [317.00 ns, 577.00 ns]\n"]}], "source": ["out = numpy.zeros(1, dtype=numpy.float32)\n", "ptr_x = x32.__array_interface__['data'][0]\n", "cptr_x = ffi.cast ( \"float*\" , ptr_x ) \n", "ptr_out = out.__array_interface__['data'][0]\n", "cptr_out = ffi.cast ( \"float*\" , ptr_out ) \n", "\n", "memo_time.append(timeexe(\"cffi-linreg-custom-float32\", \"linreg_custom_float(cptr_x, cptr_out)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La diff\u00e9rence n'est pas significative."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### SIMD\n", "\n", "C'est un ensemble d'instructions processeur pour faire des op\u00e9rations terme \u00e0 terme sur 4 float32 aussi rapidement qu'une seule. Le processeur ne peut faire des op\u00e9rations que les nombres sont copi\u00e9s dans ses registres. Le programme passe alors son temps \u00e0 copier des nombres depuis la m\u00e9moire vers les registres du processeur puis \u00e0 faire la copie dans le chemin inverse pour le r\u00e9sultat. Les instructions [SIMD](https://en.wikipedia.org/wiki/SIMD) font gagner du temps du niveau du calcul. Au lieu de faire 4 op\u00e9rations de multiplication terme \u00e0 terme, il n'en fait plus qu'une. Il suffit de savoir comment utiliser ces instructions. Avec Visual Studio, elles sont accessible via ces fonctions [Memory and Initialization Using Streaming SIMD Extensions](https://msdn.microsoft.com/en-us/library/0hey67c0%28v=vs.100%29.aspx).\n", "Le code suivant n'est probablement pas optimal mais il n'est pas trop compliqu\u00e9 \u00e0 suivre."]}, {"cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": ["code = \"\"\"\n", "#include \n", "\n", "static int linreg_custom_float_simd(float * x, float * out)\n", "{\n", " __m128 c1 = _mm_set_ps(0.3034995490664121f, -237.63931533353392f, 510.5306054362245f, 327.7369804093466f);\n", " __m128 c2 = _mm_set_ps(-814.1317093725389f, 492.81458798373245f, 102.84845219168025f, 184.60648905984064f);\n", " __m128 r1 = _mm_set_ss(152.76430691633442f);\n", " r1 = _mm_add_ss(r1, _mm_mul_ps(c1, _mm_load_ps(x)));\n", " r1 = _mm_add_ss(r1, _mm_mul_ps(c2, _mm_load_ps(x+4)));\n", " float r[4];\n", " _mm_store_ps(r, r1); \n", " out[0] = r[0] + r[1] + r[2] + r[3] + 743.5196167505419f * x[8] + 76.095172216624f * x[9];\n", " return 1;\n", "}\n", "\"\"\""]}, {"cell_type": "code", "execution_count": 37, "metadata": {"scrolled": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["generating .\\_linear_regression_custom_float_simd.c\n", "(already up-to-date)\n", "the current directory is 'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a'\n", "running build_ext\n", "building '_linear_regression_custom_float_simd' extension\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\cl.exe /c /nologo /Ox /W3 /GL /DNDEBUG /MD -IC:\\Python395_x64\\include -IC:\\Python395_x64\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\include -IC:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\include -IC:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt -IC:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt /Tc_linear_regression_custom_float_simd.c /Fo.\\Release\\_linear_regression_custom_float_simd.obj\n", "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\bin\\HostX86\\x64\\link.exe /nologo /INCREMENTAL:NO /LTCG /DLL /MANIFEST:EMBED,ID=2 /MANIFESTUAC:NO /LIBPATH:C:\\Python395_x64\\libs /LIBPATH:C:\\Python395_x64\\PCbuild\\amd64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\ATLMFC\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.29.30037\\lib\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x64 /LIBPATH:C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x64 /EXPORT:PyInit__linear_regression_custom_float_simd .\\Release\\_linear_regression_custom_float_simd.obj /OUT:.\\_linear_regression_custom_float_simd.cp39-win_amd64.pyd /IMPLIB:.\\Release\\_linear_regression_custom_float_simd.cp39-win_amd64.lib\n"]}, {"data": {"text/plain": ["'C:\\\\xavierdupre\\\\__home_\\\\GitHub\\\\ensae_teaching_cs\\\\_doc\\\\notebooks\\\\2a\\\\_linear_regression_custom_float_simd.cp39-win_amd64.pyd'"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["from cffi import FFI\n", "ffibuilder = FFI()\n", "\n", "ffibuilder.cdef(\"int linreg_custom_float_simd(float *, float *);\")\n", "ffibuilder.set_source(\"_linear_regression_custom_float_simd\", code)\n", "ffibuilder.compile(verbose=True)"]}, {"cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": ["from _linear_regression_custom_float_simd.lib import linreg_custom_float_simd"]}, {"cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([172.00832], dtype=float32)"]}, "execution_count": 40, "metadata": {}, "output_type": "execute_result"}], "source": ["out = numpy.zeros(1, dtype=numpy.float32)\n", "ptr_x = x32.__array_interface__['data'][0]\n", "cptr_x = ffi.cast ( \"float*\" , ptr_x ) \n", "ptr_out = out.__array_interface__['data'][0]\n", "cptr_out = ffi.cast ( \"float*\" , ptr_out ) \n", "\n", "linreg_custom_float_simd(cptr_x, cptr_out)\n", "out"]}, {"cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 418.99 ns Ecart-type 387.18 ns (with 100 runs) in [299.00 ns, 631.00 ns]\n"]}], "source": ["memo_time.append(timeexe(\"cffi-linreg-custom-float32-simd\", \"linreg_custom_float_simd(cptr_x, cptr_out)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est l\u00e9g\u00e8rement mieux, quelques r\u00e9f\u00e9rences :\n", "\n", "* [aligned_vs_unaligned_load.c](https://gist.github.com/rmcgibbo/7689820) : c'est du code mais facile \u00e0 lire.\n", "* [How to Write Fast Numerical Code](https://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class17.pdf)\n", "\n", "Les processeurs \u00e9voluent au fil du temps, 4 float, 8 float, [SIMD2](https://msdn.microsoft.com/en-us/library/kcwz153a%28v=vs.100%29.aspx), [FMA4 Intrinsics Added for Visual Studio 2010 SP1](https://msdn.microsoft.com/en-us/library/gg445134%28v=vs.100%29.aspx), [AVX](https://software.intel.com/en-us/articles/introduction-to-intel-advanced-vector-extensions/)."]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["### R\u00e9\u00e9criture purement Python\n", "\n", "On continue avec uniquement du Python sans numpy."]}, {"cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [{"data": {"text/plain": ["[-35.81159277952622,\n", " -267.39308260812277,\n", " 503.56121841083586,\n", " 337.87944183803455,\n", " -577.2725523621144,\n", " 373.6293947654621,\n", " -99.69779326605845,\n", " 78.39842093764699,\n", " 656.5430915289373,\n", " 80.33839980437061]"]}, "execution_count": 42, "metadata": {}, "output_type": "execute_result"}], "source": ["coef = clr.coef_\n", "list(coef)"]}, {"cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [{"data": {"text/plain": ["'152.69613239933642+x[0]*(-35.81159277952622)+x[1]*(-267.39308260812277)+x[2]*(503.56121841083586)+x[3]*(337.87944183803455)+x[4]*(-577.2725523621144)+x[5]*(373.6293947654621)+x[6]*(-99.69779326605845)+x[7]*(78.39842093764699)+x[8]*(656.5430915289373)+x[9]*(80.33839980437061)'"]}, "execution_count": 43, "metadata": {}, "output_type": "execute_result"}], "source": ["code = str(clr.intercept_) + \"+\" + \"+\".join(\"x[{0}]*({1})\".format(i, c) for i, c in enumerate(coef))\n", "code"]}, {"cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [{"data": {"text/plain": ["211.03463170273153"]}, "execution_count": 44, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_python(x):\n", " return 152.764306916+x[0]*0.3034995490664121+x[1]*(-237.63931533353392)+x[2]*510.5306054362245+ \\\n", " x[3]*327.7369804093466+ \\\n", " x[4]*(-814.1317093725389)+x[5]*492.81458798373245+x[6]*102.84845219168025+ \\\n", " x[7]*184.60648905984064+x[8]*743.5196167505419+x[9]*76.095172216624\n", " \n", "predict_clr_python(x[0])"]}, {"cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 2.02 \u00b5s Ecart-type 670.45 ns (with 100 runs) in [1.70 \u00b5s, 2.73 \u00b5s]\n"]}], "source": ["z = list(x[0])\n", "memo_time.append(timeexe(\"python-linreg-custom\", \"predict_clr_python(z)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["De fa\u00e7on assez surprenante, c'est plut\u00f4t rapide. Et si on y mettait une boucle."]}, {"cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.72477744760596"]}, "execution_count": 46, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_python_loop(x, coef, intercept): \n", " return intercept + sum(a*b for a, b in zip(x, coef))\n", " \n", "predict_clr_python_loop(x[0], list(clr.coef_), clr.intercept_)"]}, {"cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 3.54 \u00b5s Ecart-type 1.31 \u00b5s (with 100 runs) in [2.68 \u00b5s, 6.16 \u00b5s]\n"]}], "source": ["coef = list(clr.coef_)\n", "intercept = clr.intercept_\n", "memo_time.append(timeexe(\"python-linreg\", \"predict_clr_python_loop(z, coef, intercept)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["A peine plus long."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### R\u00e9\u00e9criture avec Python et numpy"]}, {"cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.72477744760596"]}, "execution_count": 48, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_numpy(x, coef, intercept): \n", " return intercept + numpy.dot(coef, x).sum()\n", " \n", "predict_clr_numpy(x[0], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 8.08 \u00b5s Ecart-type 3.44 \u00b5s (with 100 runs) in [6.44 \u00b5s, 12.16 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numpy-linreg-numpy\", \"predict_clr_numpy(z, coef, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Les dimensions des tableaux sont trop petites pour que le calcul matriciel apporte une diff\u00e9rence. On se retrouve dans le cas *cffi* o\u00f9 les \u00e9changes Python - C grignotent tout le temps de calcul."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### numba\n", "\n", "[numba](http://numba.pydata.org/) essaye de compiler \u00e0 la vol\u00e9e des bouts de codes \u00e9crits en Python. On induque quelle fonction optimiser en faisant pr\u00e9c\u00e9der la fonction de ``@jit``. Toutes les \u00e9critures ne fonctionnent, typiquement, certaines listes en compr\u00e9hension soul\u00e8vent une exception. Il faut donc \u00e9crire son code en Python d'une fa\u00e7on assez proche de ce qu'il serait en C."]}, {"cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": ["from numba import jit"]}, {"cell_type": "code", "execution_count": 50, "metadata": {"scrolled": false}, "outputs": [{"name": "stderr", "output_type": "stream", "text": ["C:\\Python395_x64\\lib\\site-packages\\numba\\core\\ir_utils.py:2152: NumbaPendingDeprecationWarning: \u001b[1m\n", "Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'x' of function 'predict_clr_numba'.\n", "\n", "For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types\n", "\u001b[1m\n", "File \"\", line 2:\u001b[0m\n", "\u001b[1m@jit\n", "\u001b[1mdef predict_clr_numba(x, coef, intercept):\n", "\u001b[0m\u001b[1m^\u001b[0m\u001b[0m\n", "\u001b[0m\n", " warnings.warn(NumbaPendingDeprecationWarning(msg, loc=loc))\n"]}, {"data": {"text/plain": ["214.724777447606"]}, "execution_count": 51, "metadata": {}, "output_type": "execute_result"}], "source": ["@jit\n", "def predict_clr_numba(x, coef, intercept):\n", " s = intercept\n", " for i in range(0, len(x)):\n", " s += coef[i] * x[i]\n", " return s\n", " \n", "predict_clr_numba(z, clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 23.77 \u00b5s Ecart-type 7.36 \u00b5s (with 100 runs) in [19.99 \u00b5s, 37.64 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numba-linreg-notype\", \"predict_clr_numba(z, clr.coef_, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Plut\u00f4t rapide !"]}, {"cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.724777447606"]}, "execution_count": 53, "metadata": {}, "output_type": "execute_result"}], "source": ["@jit('double(double[:], double[:], double)')\n", "def predict_clr_numba_cast(x, coef, intercept):\n", " s = intercept\n", " for i in range(0, len(x)):\n", " s += coef[i] * x[i]\n", " return s\n", " \n", "# La fonction ne fonctionne qu'avec un numpy.array car le langage C est fortement typ\u00e9.\n", "predict_clr_numba_cast(x[0], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 948.20 ns Ecart-type 411.47 ns (with 100 runs) in [759.00 ns, 1.68 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numba-linreg-type\", \"predict_clr_numba_cast(x[0], clr.coef_, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On voit que plus on donne d'information au compilateur, plus il est capable d'optimiser."]}, {"cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.7247772216797"]}, "execution_count": 55, "metadata": {}, "output_type": "execute_result"}], "source": ["@jit('float32(float32[:], float32[:], float32)')\n", "def predict_clr_numba_cast_float(x, coef, intercept):\n", " s = intercept\n", " for i in range(0, len(x)):\n", " s += coef[i] * x[i]\n", " return s\n", " \n", "# La fonction ne fonctionne qu'avec un numpy.array car le langage C est fortement typ\u00e9.\n", "x32 = x[0].astype(numpy.float32)\n", "c32 = clr.coef_.astype(numpy.float32)\n", "i32 = numpy.float32(clr.intercept_)\n", "predict_clr_numba_cast_float(x32, c32, i32)"]}, {"cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 707.08 ns Ecart-type 268.64 ns (with 100 runs) in [565.00 ns, 1.25 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numba-linreg-type-float32\", \"predict_clr_numba_cast_float(x32, c32, i32)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On essaye avec les coefficients dans la fonction."]}, {"cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [{"data": {"text/plain": ["211.034631692416"]}, "execution_count": 57, "metadata": {}, "output_type": "execute_result"}], "source": ["@jit('double(double[:])')\n", "def predict_clr_numba_cast_custom(x):\n", " coef = [ 3.03499549e-01, -2.37639315e+02, 5.10530605e+02, 3.27736980e+02,\n", " -8.14131709e+02, 4.92814588e+02, 1.02848452e+02, 1.84606489e+02,\n", " 7.43519617e+02, 7.60951722e+01]\n", " s = 152.76430691633442\n", " for i in range(0, len(x)):\n", " s += coef[i] * x[i]\n", " return s\n", " \n", "predict_clr_numba_cast_custom(x[0])"]}, {"cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 824.35 ns Ecart-type 371.36 ns (with 100 runs) in [652.00 ns, 1.56 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numba-linreg-type-custom\", \"predict_clr_numba_cast_custom(x[0])\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On se rapproche des temps obtenus avec *cffi* sans *wrapping*, cela signifie que *numba* fait un bien meilleur travail \u00e0 ce niveau que le wrapper rapidement cr\u00e9\u00e9."]}, {"cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [{"name": "stderr", "output_type": "stream", "text": [":1: NumbaWarning: \u001b[1m\n", "Compilation is falling back to object mode WITH looplifting enabled because Function \"predict_clr_numba_numpy\" failed type inference due to: \u001b[1m\u001b[1mUnknown attribute 'sum' of type float64\n", "\u001b[1m\n", "File \"\", line 3:\u001b[0m\n", "\u001b[1mdef predict_clr_numba_numpy(x, coef, intercept): \n", "\u001b[1m return intercept + numpy.dot(coef, x).sum()\n", "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", "\u001b[0m\n", "\u001b[0m\u001b[1mDuring: typing of get attribute at (3)\u001b[0m\n", "\u001b[1m\n", "File \"\", line 3:\u001b[0m\n", "\u001b[1mdef predict_clr_numba_numpy(x, coef, intercept): \n", "\u001b[1m return intercept + numpy.dot(coef, x).sum()\n", "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", "\u001b[0m\n", " @jit('double(double[:], double[:], double)')\n", "C:\\Python395_x64\\lib\\site-packages\\numba\\core\\object_mode_passes.py:151: NumbaWarning: \u001b[1mFunction \"predict_clr_numba_numpy\" was compiled in object mode without forceobj=True.\n", "\u001b[1m\n", "File \"\", line 2:\u001b[0m\n", "\u001b[1m@jit('double(double[:], double[:], double)')\n", "\u001b[1mdef predict_clr_numba_numpy(x, coef, intercept): \n", "\u001b[0m\u001b[1m^\u001b[0m\u001b[0m\n", "\u001b[0m\n", " warnings.warn(errors.NumbaWarning(warn_msg,\n", "C:\\Python395_x64\\lib\\site-packages\\numba\\core\\object_mode_passes.py:161: NumbaDeprecationWarning: \u001b[1m\n", "Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n", "\n", "For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n", "\u001b[1m\n", "File \"\", line 2:\u001b[0m\n", "\u001b[1m@jit('double(double[:], double[:], double)')\n", "\u001b[1mdef predict_clr_numba_numpy(x, coef, intercept): \n", "\u001b[0m\u001b[1m^\u001b[0m\u001b[0m\n", "\u001b[0m\n", " warnings.warn(errors.NumbaDeprecationWarning(msg,\n"]}, {"data": {"text/plain": ["214.72477744760596"]}, "execution_count": 59, "metadata": {}, "output_type": "execute_result"}], "source": ["@jit('double(double[:], double[:], double)')\n", "def predict_clr_numba_numpy(x, coef, intercept): \n", " return intercept + numpy.dot(coef, x).sum()\n", "\n", "predict_clr_numba_numpy(x[0], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 5.15 \u00b5s Ecart-type 1.78 \u00b5s (with 100 runs) in [4.37 \u00b5s, 6.00 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"numba-linreg-type-numpy\", \"predict_clr_numba_numpy(x[0], clr.coef_, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["*numba* est moins performant quand *numpy* est impliqu\u00e9 car le code de numpy n'est pas r\u00e9\u00e9crit, il est appel\u00e9."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### cython\n", "\n", "[cython](http://cython.org/) permet de cr\u00e9er des extensions C de plus grande envergure que *numba*. C'est l'option choisie par [scikit-learn](http://scikit-learn.org/stable/). Il vaut mieux conna\u00eetre le C pour s'en servir et l\u00e0 encore, l'objectif est de r\u00e9duire les \u00e9changes Python / C qui co\u00fbtent cher."]}, {"cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": ["%load_ext cython"]}, {"cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": ["%%cython\n", "def predict_clr_cython(x, coef, intercept):\n", " s = intercept\n", " for i in range(0, len(x)):\n", " s += coef[i] * x[i]\n", " return s"]}, {"cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.724777447606"]}, "execution_count": 63, "metadata": {}, "output_type": "execute_result"}], "source": ["predict_clr_cython(x[0], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 2.71 \u00b5s Ecart-type 1.60 \u00b5s (with 100 runs) in [1.92 \u00b5s, 7.19 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\"cython-linreg\", \"predict_clr_cython(x[0], clr.coef_, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Cython fait moins bien que *numba* dans notre cas et l'optimisation propos\u00e9e est assez proche du temps d\u00e9j\u00e0 obtenue avec le langage Python seul. Cela est d\u00fb au fait que la plupart des objets tels que du code associ\u00e9 aux listes ou aux dictionnaires ont \u00e9t\u00e9 r\u00e9\u00e9crits en C."]}, {"cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": ["%%cython\n", "cimport numpy as npc\n", "\n", "def predict_clr_cython_type(npc.ndarray[double, ndim=1, mode='c'] x,\n", " npc.ndarray[double, ndim=1, mode='c'] coef,\n", " double intercept):\n", " cdef double s = intercept\n", " for i in range(0, x.shape[0]):\n", " s += coef[i] * x[i]\n", " return s"]}, {"cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [{"data": {"text/plain": ["214.724777447606"]}, "execution_count": 66, "metadata": {}, "output_type": "execute_result"}], "source": ["predict_clr_cython_type(x[0], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 721.31 ns Ecart-type 399.10 ns (with 100 runs) in [533.00 ns, 1.44 \u00b5s]\n"]}], "source": ["memo_time.append(timeexe(\n", " \"cython-linreg-type\", \"predict_clr_cython_type(x[0], clr.coef_, clr.intercept_)\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Le temps est quasi identique avec un \u00e9cart type moins grand de fa\u00e7on significative."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Une derni\u00e8re option : ONNX\n", "\n", "[ONNX](https://onnx.ai/) est un format de s\u00e9rialisation qui permet de d\u00e9crire un mod\u00e8le de mod\u00e8le de machine learning ou de deep learning. Cela permet de dissocer le mod\u00e8le de la librairie qui a servi \u00e0 le produire (voir [ML.net and ONNX](http://www.xavierdupre.fr/app/machinelearningext/helpsphinx/aonnx.html))."]}, {"cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [{"name": "stderr", "output_type": "stream", "text": ["Error in sys.excepthook:\n", "Traceback (most recent call last):\n", " File \"C:\\Python395_x64\\lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 1934, in showtraceback\n", " stb = value._render_traceback_()\n", "AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"C:\\Python395_x64\\lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 1936, in showtraceback\n", " stb = self.InteractiveTB.structured_traceback(etype,\n", " File \"C:\\Python395_x64\\lib\\site-packages\\IPython\\core\\ultratb.py\", line 1105, in structured_traceback\n", " return FormattedTB.structured_traceback(\n", " File \"C:\\Python395_x64\\lib\\site-packages\\IPython\\core\\ultratb.py\", line 999, in structured_traceback\n", " return VerboseTB.structured_traceback(\n", " File \"C:\\Python395_x64\\lib\\site-packages\\IPython\\core\\ultratb.py\", line 851, in structured_traceback\n", " assert etb is not None\n", "AssertionError\n", "\n", "Original exception was:\n", "RuntimeError: module compiled against API version 0xf but this version of numpy is 0xe\n"]}, {"name": "stdout", "output_type": "stream", "text": ["onnx, skl2onnx, onnxruntime sont disponibles.\n"]}], "source": ["try:\n", " from skl2onnx import convert_sklearn\n", " from skl2onnx.common.data_types import FloatTensorType\n", " import onnxruntime\n", " import onnx\n", " ok_onnx = True\n", " print(\"onnx, skl2onnx, onnxruntime sont disponibles.\")\n", " \n", " def save_model(onnx_model, filename):\n", " with open(filename, \"wb\") as f:\n", " f.write(onnx_model.SerializeToString())\n", "except ImportError as e:\n", " print(\"La suite requiert onnx, skl2onnx et onnxruntime.\")\n", " print(e)\n", " ok_onnx = False"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On convertit le mod\u00e8le au format [ONNX](https://onnx.ai/)."]}, {"cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Mod\u00e8le s\u00e9rialis\u00e9 au format ONNX\n", "ir_version: 6\n", "producer_name: \"skl2onnx\"\n", "producer_version: \"1.10.4\"\n", "domain: \"ai.onnx\"\n", "model_version: 0\n", "doc_string: \"\"\n", "graph {\n", " node {\n", " input: \"input\"\n", " output: \"variable\"\n", " name: \"LinearRegressor\"\n", " op_type: \"LinearRegressor\"\n", " attribute {\n", " name: \"coefficients\"\n", " floats: -35.81159210205078\n", " floats: -267.3930969238281\n", " floats: 503.56121826171875\n", " floats: 337.87945556640625\n", " floats: -577.2725219726562\n", " floats: 373.62939453125\n", " floats: -99.69779205322266\n", " floats: 78.39842224121094\n", " floats: 656.5430908203125\n", " floats: 80.3384017944336\n", " type: FLOATS\n", " }\n", " attribute {\n", " name: \"intercepts\"\n", " floats: 152.69613647460938\n", " type: FLOATS\n", " }\n", " domain: \"ai.onnx.ml\"\n", " }\n", " name: \"model\"\n", " input {\n", " name: \"input\"\n", " type {\n", " tensor_type {\n", " elem_type: 1\n", " shape {\n", " dim {\n", " }\n", " dim {\n", " dim_value: 10\n", " }\n", " }\n", " }\n", " }\n", " }\n", " output {\n", " name: \"variable\"\n", " type {\n", " tensor_type {\n", " elem_type: 1\n", " shape {\n", " dim {\n", " }\n", " dim {\n", " dim_value: 1\n", " }\n", " }\n", " }\n", " }\n", " }\n", "}\n", "opset_import {\n", " domain: \"ai.onnx.ml\"\n", " version: 1\n", "}\n", "opset_import {\n", " domain: \"\"\n", " version: 11\n", "}\n", "\n"]}], "source": ["if ok_onnx:\n", " onnx_model = convert_sklearn(\n", " clr, 'model', [('input', FloatTensorType([None, clr.coef_.shape[0]]))],\n", " target_opset=11)\n", " onnx_model.ir_version = 6\n", " save_model(onnx_model, 'model.onnx')\n", " \n", " model_onnx = onnx.load('model.onnx')\n", " print(\"Mod\u00e8le s\u00e9rialis\u00e9 au format ONNX\")\n", " print(model_onnx)\n", "else:\n", " print(\"onnx, onnxmltools, onnxruntime sont disponibles.\")"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On calcule les pr\u00e9dictions. Le module {onnxruntime](https://docs.microsoft.com/en-us/python/api/overview/azure/onnx/intro?view=azure-onnx-py) optimise les calculs pour des mod\u00e8les de deep learning. Cela explique pourquoi tous les calculs sont r\u00e9alis\u00e9s avec des r\u00e9els repr\u00e9sent\u00e9s sur 4 octets [numpy.float32](https://docs.scipy.org/doc/numpy/user/basics.types.html?highlight=float32)."]}, {"cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Input: NodeArg(name='input', type='tensor(float)', shape=[None, 10])\n", "Output: NodeArg(name='variable', type='tensor(float)', shape=[None, 1])\n", "Prediction: [array([[214.72478]], dtype=float32)]\n"]}], "source": ["if ok_onnx:\n", " sess = onnxruntime.InferenceSession(\"model.onnx\")\n", " for i in sess.get_inputs():\n", " print('Input:', i)\n", " for o in sess.get_outputs():\n", " print('Output:', o)\n", " \n", " def predict_onnxrt(x): \n", " return sess.run([\"variable\"], {'input': x})\n", " \n", " print(\"Prediction:\", predict_onnxrt(x.astype(numpy.float32)))"]}, {"cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 13.00 \u00b5s Ecart-type 7.69 \u00b5s (with 100 runs) in [9.71 \u00b5s, 23.64 \u00b5s]\n", "Moyenne: 12.69 \u00b5s Ecart-type 1.93 \u00b5s (with 100 runs) in [11.29 \u00b5s, 16.23 \u00b5s]\n"]}], "source": ["if ok_onnx:\n", " x32 = x.astype(numpy.float32)\n", " memo_time.append(timeexe(\"onnxruntime-float32\", \"predict_onnxrt(x32)\"))\n", " memo_time.append(timeexe(\"onnxruntime-float64\", \"predict_onnxrt(x.astype(numpy.float32))\"))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### R\u00e9capitulatif"]}, {"cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
averagedeviationfirstfirst3last3repeatmin5max5coderun
legend
cffi-linreg-custom-float323.891910e-072.267541e-075.608000e-062.196000e-063.763333e-0710003.170000e-075.770000e-07linreg_custom_float(cptr_x, cptr_out)100
cffi-linreg-custom-float32-simd4.189890e-073.871792e-071.091200e-053.949667e-062.996667e-0710002.990000e-076.310000e-07linreg_custom_float_simd(cptr_x, cptr_out)100
cffi-linreg-custom4.665150e-078.519581e-072.679900e-059.352667e-063.256667e-0710003.150000e-077.150000e-07linreg_custom(cptr_x, cptr_out)100
numba-linreg-type-float327.070790e-072.686359e-071.162000e-061.083333e-065.663333e-0710005.650000e-071.249000e-06predict_clr_numba_cast_float(x32, c32, i32)100
cython-linreg-type7.213150e-073.991047e-071.252000e-068.300000e-075.513333e-0710005.330000e-071.443000e-06predict_clr_cython_type(x[0], clr.coef_, clr.i...100
numba-linreg-type-custom8.243540e-073.713608e-077.940000e-077.156667e-076.543333e-0710006.520000e-071.558000e-06predict_clr_numba_cast_custom(x[0])100
cffi-linreg8.313670e-077.080831e-076.414000e-063.244000e-064.170000e-0710004.160000e-071.519000e-06lib.linreg(n, cptr_x, cptr_coef, clr.intercept...100
numba-linreg-type9.482040e-074.114651e-079.350000e-078.663333e-077.596667e-0710007.590000e-071.678000e-06predict_clr_numba_cast(x[0], clr.coef_, clr.in...100
python-linreg-custom2.018942e-066.704544e-075.511000e-064.254667e-061.703667e-0610001.696000e-062.731000e-06predict_clr_python(z)100
cython-linreg2.706254e-061.597806e-065.083000e-065.419333e-062.126000e-0610001.920000e-067.194000e-06predict_clr_cython(x[0], clr.coef_, clr.interc...100
python-linreg3.539523e-061.306156e-068.761000e-067.510000e-062.779667e-0610002.681000e-066.164000e-06predict_clr_python_loop(z, coef, intercept)100
cffi-linreg-custom-float wrapped5.123886e-061.598363e-061.200400e-051.176767e-054.483000e-0610004.477000e-066.436000e-06predict_clr_custom(x32)100
numba-linreg-type-numpy5.147404e-061.775723e-061.874100e-051.572433e-054.474333e-0610004.374000e-065.996000e-06predict_clr_numba_numpy(x[0], clr.coef_, clr.i...100
cffi-linreg-custom wrapped5.274568e-061.823247e-062.166200e-052.268700e-055.626667e-0610004.422000e-067.773000e-06predict_clr_custom(x)100
cffi-linreg-wrapped7.519599e-062.343424e-061.580000e-052.028933e-056.263333e-0610006.201000e-061.041900e-05predict_clr(x, clr)100
numpy-linreg-numpy8.081947e-063.442724e-063.679000e-053.075167e-056.525667e-0610006.442000e-061.216200e-05predict_clr_numpy(z, coef, clr.intercept_)100
onnxruntime-float641.269215e-051.926911e-061.742200e-051.337233e-051.133667e-0510001.129500e-051.623200e-05predict_onnxrt(x.astype(numpy.float32))100
onnxruntime-float321.299773e-057.686900e-062.281400e-051.689933e-051.009533e-0510009.713000e-062.363700e-05predict_onnxrt(x32)100
numba-linreg-notype2.376539e-057.362380e-063.079800e-052.445400e-053.723367e-0510001.998900e-053.763900e-05predict_clr_numba(z, clr.coef_, clr.intercept_)100
sklearn.predict4.550096e-056.337585e-067.724200e-056.447133e-054.143867e-0510004.087300e-055.295400e-05clr.predict(z)100
\n", "
"], "text/plain": [" average deviation first \\\n", "legend \n", "cffi-linreg-custom-float32 3.891910e-07 2.267541e-07 5.608000e-06 \n", "cffi-linreg-custom-float32-simd 4.189890e-07 3.871792e-07 1.091200e-05 \n", "cffi-linreg-custom 4.665150e-07 8.519581e-07 2.679900e-05 \n", "numba-linreg-type-float32 7.070790e-07 2.686359e-07 1.162000e-06 \n", "cython-linreg-type 7.213150e-07 3.991047e-07 1.252000e-06 \n", "numba-linreg-type-custom 8.243540e-07 3.713608e-07 7.940000e-07 \n", "cffi-linreg 8.313670e-07 7.080831e-07 6.414000e-06 \n", "numba-linreg-type 9.482040e-07 4.114651e-07 9.350000e-07 \n", "python-linreg-custom 2.018942e-06 6.704544e-07 5.511000e-06 \n", "cython-linreg 2.706254e-06 1.597806e-06 5.083000e-06 \n", "python-linreg 3.539523e-06 1.306156e-06 8.761000e-06 \n", "cffi-linreg-custom-float wrapped 5.123886e-06 1.598363e-06 1.200400e-05 \n", "numba-linreg-type-numpy 5.147404e-06 1.775723e-06 1.874100e-05 \n", "cffi-linreg-custom wrapped 5.274568e-06 1.823247e-06 2.166200e-05 \n", "cffi-linreg-wrapped 7.519599e-06 2.343424e-06 1.580000e-05 \n", "numpy-linreg-numpy 8.081947e-06 3.442724e-06 3.679000e-05 \n", "onnxruntime-float64 1.269215e-05 1.926911e-06 1.742200e-05 \n", "onnxruntime-float32 1.299773e-05 7.686900e-06 2.281400e-05 \n", "numba-linreg-notype 2.376539e-05 7.362380e-06 3.079800e-05 \n", "sklearn.predict 4.550096e-05 6.337585e-06 7.724200e-05 \n", "\n", " first3 last3 repeat \\\n", "legend \n", "cffi-linreg-custom-float32 2.196000e-06 3.763333e-07 1000 \n", "cffi-linreg-custom-float32-simd 3.949667e-06 2.996667e-07 1000 \n", "cffi-linreg-custom 9.352667e-06 3.256667e-07 1000 \n", "numba-linreg-type-float32 1.083333e-06 5.663333e-07 1000 \n", "cython-linreg-type 8.300000e-07 5.513333e-07 1000 \n", "numba-linreg-type-custom 7.156667e-07 6.543333e-07 1000 \n", "cffi-linreg 3.244000e-06 4.170000e-07 1000 \n", "numba-linreg-type 8.663333e-07 7.596667e-07 1000 \n", "python-linreg-custom 4.254667e-06 1.703667e-06 1000 \n", "cython-linreg 5.419333e-06 2.126000e-06 1000 \n", "python-linreg 7.510000e-06 2.779667e-06 1000 \n", "cffi-linreg-custom-float wrapped 1.176767e-05 4.483000e-06 1000 \n", "numba-linreg-type-numpy 1.572433e-05 4.474333e-06 1000 \n", "cffi-linreg-custom wrapped 2.268700e-05 5.626667e-06 1000 \n", "cffi-linreg-wrapped 2.028933e-05 6.263333e-06 1000 \n", "numpy-linreg-numpy 3.075167e-05 6.525667e-06 1000 \n", "onnxruntime-float64 1.337233e-05 1.133667e-05 1000 \n", "onnxruntime-float32 1.689933e-05 1.009533e-05 1000 \n", "numba-linreg-notype 2.445400e-05 3.723367e-05 1000 \n", "sklearn.predict 6.447133e-05 4.143867e-05 1000 \n", "\n", " min5 max5 \\\n", "legend \n", "cffi-linreg-custom-float32 3.170000e-07 5.770000e-07 \n", "cffi-linreg-custom-float32-simd 2.990000e-07 6.310000e-07 \n", "cffi-linreg-custom 3.150000e-07 7.150000e-07 \n", "numba-linreg-type-float32 5.650000e-07 1.249000e-06 \n", "cython-linreg-type 5.330000e-07 1.443000e-06 \n", "numba-linreg-type-custom 6.520000e-07 1.558000e-06 \n", "cffi-linreg 4.160000e-07 1.519000e-06 \n", "numba-linreg-type 7.590000e-07 1.678000e-06 \n", "python-linreg-custom 1.696000e-06 2.731000e-06 \n", "cython-linreg 1.920000e-06 7.194000e-06 \n", "python-linreg 2.681000e-06 6.164000e-06 \n", "cffi-linreg-custom-float wrapped 4.477000e-06 6.436000e-06 \n", "numba-linreg-type-numpy 4.374000e-06 5.996000e-06 \n", "cffi-linreg-custom wrapped 4.422000e-06 7.773000e-06 \n", "cffi-linreg-wrapped 6.201000e-06 1.041900e-05 \n", "numpy-linreg-numpy 6.442000e-06 1.216200e-05 \n", "onnxruntime-float64 1.129500e-05 1.623200e-05 \n", "onnxruntime-float32 9.713000e-06 2.363700e-05 \n", "numba-linreg-notype 1.998900e-05 3.763900e-05 \n", "sklearn.predict 4.087300e-05 5.295400e-05 \n", "\n", " code \\\n", "legend \n", "cffi-linreg-custom-float32 linreg_custom_float(cptr_x, cptr_out) \n", "cffi-linreg-custom-float32-simd linreg_custom_float_simd(cptr_x, cptr_out) \n", "cffi-linreg-custom linreg_custom(cptr_x, cptr_out) \n", "numba-linreg-type-float32 predict_clr_numba_cast_float(x32, c32, i32) \n", "cython-linreg-type predict_clr_cython_type(x[0], clr.coef_, clr.i... \n", "numba-linreg-type-custom predict_clr_numba_cast_custom(x[0]) \n", "cffi-linreg lib.linreg(n, cptr_x, cptr_coef, clr.intercept... \n", "numba-linreg-type predict_clr_numba_cast(x[0], clr.coef_, clr.in... \n", "python-linreg-custom predict_clr_python(z) \n", "cython-linreg predict_clr_cython(x[0], clr.coef_, clr.interc... \n", "python-linreg predict_clr_python_loop(z, coef, intercept) \n", "cffi-linreg-custom-float wrapped predict_clr_custom(x32) \n", "numba-linreg-type-numpy predict_clr_numba_numpy(x[0], clr.coef_, clr.i... \n", "cffi-linreg-custom wrapped predict_clr_custom(x) \n", "cffi-linreg-wrapped predict_clr(x, clr) \n", "numpy-linreg-numpy predict_clr_numpy(z, coef, clr.intercept_) \n", "onnxruntime-float64 predict_onnxrt(x.astype(numpy.float32)) \n", "onnxruntime-float32 predict_onnxrt(x32) \n", "numba-linreg-notype predict_clr_numba(z, clr.coef_, clr.intercept_) \n", "sklearn.predict clr.predict(z) \n", "\n", " run \n", "legend \n", "cffi-linreg-custom-float32 100 \n", "cffi-linreg-custom-float32-simd 100 \n", "cffi-linreg-custom 100 \n", "numba-linreg-type-float32 100 \n", "cython-linreg-type 100 \n", "numba-linreg-type-custom 100 \n", "cffi-linreg 100 \n", "numba-linreg-type 100 \n", "python-linreg-custom 100 \n", "cython-linreg 100 \n", "python-linreg 100 \n", "cffi-linreg-custom-float wrapped 100 \n", "numba-linreg-type-numpy 100 \n", "cffi-linreg-custom wrapped 100 \n", "cffi-linreg-wrapped 100 \n", "numpy-linreg-numpy 100 \n", "onnxruntime-float64 100 \n", "onnxruntime-float32 100 \n", "numba-linreg-notype 100 \n", "sklearn.predict 100 "]}, "execution_count": 72, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "df = pandas.DataFrame(data=memo_time)\n", "df = df.set_index(\"legend\").sort_values(\"average\")\n", "df"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On enl\u00e8ve quelques colonnes et on rappelle :\n", "\n", "* **cffi**: signifie optimis\u00e9 avec cffi\n", "* **custom**: pas de boucle mais la fonction ne peut pr\u00e9dire qu'une seule r\u00e9gression lin\u00e9aire\n", "* **float32**: utilise des float et non des double\n", "* **linreg**: r\u00e9gression lin\u00e9aire\n", "* **numba**: optimisation avec numba\n", "* **numpy**: optimisation avec numpy\n", "* **python**: pas de C, que du python\n", "* **simd**: optimis\u00e9 avec les instructions SIMD\n", "* **sklearn**: fonction sklearn.predict\n", "* **static**: la fonction utilise des variables statiques\n", "* **type**: la fonction est typ\u00e9e et ne fonctionne qu'avec un type pr\u00e9cis en entr\u00e9e.\n", "* **wrapped**: code optimis\u00e9 mais embabll\u00e9 dans une fonction Python qui elle ne l'est pas (les containers sont recr\u00e9\u00e9s \u00e0 chaque fois)"]}, {"cell_type": "code", "execution_count": 72, "metadata": {"scrolled": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
averagedeviationmin5max5runcode
legend
cffi-linreg-custom-float323.891910e-072.267541e-073.170000e-075.770000e-07100linreg_custom_float(cptr_x, cptr_out)
cffi-linreg-custom-float32-simd4.189890e-073.871792e-072.990000e-076.310000e-07100linreg_custom_float_simd(cptr_x, cptr_out)
cffi-linreg-custom4.665150e-078.519581e-073.150000e-077.150000e-07100linreg_custom(cptr_x, cptr_out)
numba-linreg-type-float327.070790e-072.686359e-075.650000e-071.249000e-06100predict_clr_numba_cast_float(x32, c32, i32)
cython-linreg-type7.213150e-073.991047e-075.330000e-071.443000e-06100predict_clr_cython_type(x[0], clr.coef_, clr.i...
numba-linreg-type-custom8.243540e-073.713608e-076.520000e-071.558000e-06100predict_clr_numba_cast_custom(x[0])
cffi-linreg8.313670e-077.080831e-074.160000e-071.519000e-06100lib.linreg(n, cptr_x, cptr_coef, clr.intercept...
numba-linreg-type9.482040e-074.114651e-077.590000e-071.678000e-06100predict_clr_numba_cast(x[0], clr.coef_, clr.in...
python-linreg-custom2.018942e-066.704544e-071.696000e-062.731000e-06100predict_clr_python(z)
cython-linreg2.706254e-061.597806e-061.920000e-067.194000e-06100predict_clr_cython(x[0], clr.coef_, clr.interc...
python-linreg3.539523e-061.306156e-062.681000e-066.164000e-06100predict_clr_python_loop(z, coef, intercept)
cffi-linreg-custom-float wrapped5.123886e-061.598363e-064.477000e-066.436000e-06100predict_clr_custom(x32)
numba-linreg-type-numpy5.147404e-061.775723e-064.374000e-065.996000e-06100predict_clr_numba_numpy(x[0], clr.coef_, clr.i...
cffi-linreg-custom wrapped5.274568e-061.823247e-064.422000e-067.773000e-06100predict_clr_custom(x)
cffi-linreg-wrapped7.519599e-062.343424e-066.201000e-061.041900e-05100predict_clr(x, clr)
numpy-linreg-numpy8.081947e-063.442724e-066.442000e-061.216200e-05100predict_clr_numpy(z, coef, clr.intercept_)
onnxruntime-float641.269215e-051.926911e-061.129500e-051.623200e-05100predict_onnxrt(x.astype(numpy.float32))
onnxruntime-float321.299773e-057.686900e-069.713000e-062.363700e-05100predict_onnxrt(x32)
numba-linreg-notype2.376539e-057.362380e-061.998900e-053.763900e-05100predict_clr_numba(z, clr.coef_, clr.intercept_)
sklearn.predict4.550096e-056.337585e-064.087300e-055.295400e-05100clr.predict(z)
\n", "
"], "text/plain": [" average deviation min5 \\\n", "legend \n", "cffi-linreg-custom-float32 3.891910e-07 2.267541e-07 3.170000e-07 \n", "cffi-linreg-custom-float32-simd 4.189890e-07 3.871792e-07 2.990000e-07 \n", "cffi-linreg-custom 4.665150e-07 8.519581e-07 3.150000e-07 \n", "numba-linreg-type-float32 7.070790e-07 2.686359e-07 5.650000e-07 \n", "cython-linreg-type 7.213150e-07 3.991047e-07 5.330000e-07 \n", "numba-linreg-type-custom 8.243540e-07 3.713608e-07 6.520000e-07 \n", "cffi-linreg 8.313670e-07 7.080831e-07 4.160000e-07 \n", "numba-linreg-type 9.482040e-07 4.114651e-07 7.590000e-07 \n", "python-linreg-custom 2.018942e-06 6.704544e-07 1.696000e-06 \n", "cython-linreg 2.706254e-06 1.597806e-06 1.920000e-06 \n", "python-linreg 3.539523e-06 1.306156e-06 2.681000e-06 \n", "cffi-linreg-custom-float wrapped 5.123886e-06 1.598363e-06 4.477000e-06 \n", "numba-linreg-type-numpy 5.147404e-06 1.775723e-06 4.374000e-06 \n", "cffi-linreg-custom wrapped 5.274568e-06 1.823247e-06 4.422000e-06 \n", "cffi-linreg-wrapped 7.519599e-06 2.343424e-06 6.201000e-06 \n", "numpy-linreg-numpy 8.081947e-06 3.442724e-06 6.442000e-06 \n", "onnxruntime-float64 1.269215e-05 1.926911e-06 1.129500e-05 \n", "onnxruntime-float32 1.299773e-05 7.686900e-06 9.713000e-06 \n", "numba-linreg-notype 2.376539e-05 7.362380e-06 1.998900e-05 \n", "sklearn.predict 4.550096e-05 6.337585e-06 4.087300e-05 \n", "\n", " max5 run \\\n", "legend \n", "cffi-linreg-custom-float32 5.770000e-07 100 \n", "cffi-linreg-custom-float32-simd 6.310000e-07 100 \n", "cffi-linreg-custom 7.150000e-07 100 \n", "numba-linreg-type-float32 1.249000e-06 100 \n", "cython-linreg-type 1.443000e-06 100 \n", "numba-linreg-type-custom 1.558000e-06 100 \n", "cffi-linreg 1.519000e-06 100 \n", "numba-linreg-type 1.678000e-06 100 \n", "python-linreg-custom 2.731000e-06 100 \n", "cython-linreg 7.194000e-06 100 \n", "python-linreg 6.164000e-06 100 \n", "cffi-linreg-custom-float wrapped 6.436000e-06 100 \n", "numba-linreg-type-numpy 5.996000e-06 100 \n", "cffi-linreg-custom wrapped 7.773000e-06 100 \n", "cffi-linreg-wrapped 1.041900e-05 100 \n", "numpy-linreg-numpy 1.216200e-05 100 \n", "onnxruntime-float64 1.623200e-05 100 \n", "onnxruntime-float32 2.363700e-05 100 \n", "numba-linreg-notype 3.763900e-05 100 \n", "sklearn.predict 5.295400e-05 100 \n", "\n", " code \n", "legend \n", "cffi-linreg-custom-float32 linreg_custom_float(cptr_x, cptr_out) \n", "cffi-linreg-custom-float32-simd linreg_custom_float_simd(cptr_x, cptr_out) \n", "cffi-linreg-custom linreg_custom(cptr_x, cptr_out) \n", "numba-linreg-type-float32 predict_clr_numba_cast_float(x32, c32, i32) \n", "cython-linreg-type predict_clr_cython_type(x[0], clr.coef_, clr.i... \n", "numba-linreg-type-custom predict_clr_numba_cast_custom(x[0]) \n", "cffi-linreg lib.linreg(n, cptr_x, cptr_coef, clr.intercept... \n", "numba-linreg-type predict_clr_numba_cast(x[0], clr.coef_, clr.in... \n", "python-linreg-custom predict_clr_python(z) \n", "cython-linreg predict_clr_cython(x[0], clr.coef_, clr.interc... \n", "python-linreg predict_clr_python_loop(z, coef, intercept) \n", "cffi-linreg-custom-float wrapped predict_clr_custom(x32) \n", "numba-linreg-type-numpy predict_clr_numba_numpy(x[0], clr.coef_, clr.i... \n", "cffi-linreg-custom wrapped predict_clr_custom(x) \n", "cffi-linreg-wrapped predict_clr(x, clr) \n", "numpy-linreg-numpy predict_clr_numpy(z, coef, clr.intercept_) \n", "onnxruntime-float64 predict_onnxrt(x.astype(numpy.float32)) \n", "onnxruntime-float32 predict_onnxrt(x32) \n", "numba-linreg-notype predict_clr_numba(z, clr.coef_, clr.intercept_) \n", "sklearn.predict clr.predict(z) "]}, "execution_count": 73, "metadata": {}, "output_type": "execute_result"}], "source": ["cols = [\"average\", \"deviation\", \"min5\", \"max5\", \"run\", \"code\"]\n", "df[cols]"]}, {"cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [{"name": "stderr", "output_type": "stream", "text": [":7: MatplotlibDeprecationWarning: The 'b' parameter of grid() has been renamed 'visible' since Matplotlib 3.5; support for the old name will be dropped two minor releases later.\n", " ax.grid(b=True, which=\"major\")\n", ":8: MatplotlibDeprecationWarning: The 'b' parameter of grid() has been renamed 'visible' since Matplotlib 3.5; support for the old name will be dropped two minor releases later.\n", " ax.grid(b=True, which=\"minor\");\n"]}, {"data": {"image/png": "\n", "text/plain": ["
"]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "fig, ax = plt.subplots(1, 1, figsize=(14,6))\n", "df[[\"average\", \"deviation\"]].plot(kind=\"barh\", logx=True, ax=ax, xerr=\"deviation\",\n", " legend=False, fontsize=12, width=0.8)\n", "ax.set_ylabel(\"\")\n", "ax.grid(b=True, which=\"major\")\n", "ax.grid(b=True, which=\"minor\");"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Il manque \u00e0 ce comparatif le [GPU](https://en.wikipedia.org/wiki/Graphics_processing_unit) mais c'est un peu plus complexe \u00e0 mettre en oeuvre, il faut une carte [GPU](https://fr.wikipedia.org/wiki/Processeur_graphique) et la parall\u00e9lisation n'apporterait pas \u00e9norm\u00e9ment compte tenu de la faible dimension du probl\u00e8me."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Pr\u00e9diction one-off et biais de mesure\n", "\n", "Le graphique pr\u00e9c\u00e9dent montre que la fonction ``predict`` de *scikit-learn* est la plus lente. La premi\u00e8re raison est que ce code est valable pour toutes les r\u00e9gresssions lin\u00e9aires alors que toutes les autres fonctions sont sp\u00e9cialis\u00e9es pour un seul mod\u00e8le. La seconde raison est que le code de *scikit-learn* est optimis\u00e9 pour le calcul de plusieurs pr\u00e9dictions \u00e0 la fois alors que toutes les autres fonctions n'en calcule qu'une seule (sc\u00e9nario dit *one-off*). On compare \u00e0 ce que donnerait unev version purement python et numpy."]}, {"cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([[214.72477745],\n", " [175.29091463]])"]}, "execution_count": 75, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_python_loop_multi(x, coef, intercept): \n", " # On s'attend \u00e0 deux dimension.\n", " res = numpy.zeros((x.shape[0], 1))\n", " res[:, 0] = intercept\n", " for i in range(0, x.shape[0]):\n", " res[i, 0] += sum(a*b for a, b in zip(x[i, :], coef))\n", " return res\n", "\n", "predict_clr_python_loop_multi(diabetes_X_test[:2], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([[214.72477745],\n", " [175.29091463]])"]}, "execution_count": 76, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_numpy_loop_multi(x, coef, intercept): \n", " # On s'attend \u00e0 deux dimension.\n", " res = numpy.ones((x.shape[0], 1)) * intercept\n", " res += x @ coef.reshape((len(coef), 1))\n", " return res\n", "\n", "predict_clr_numpy_loop_multi(diabetes_X_test[:2], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [{"data": {"text/plain": ["[214.724777447606, 175.29091463098356]"]}, "execution_count": 77, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_numba_cast_multi(X, coef, intercept):\n", " return [predict_clr_numba_cast(x, coef, intercept) for x in X]\n", "\n", "predict_clr_numba_cast_multi(diabetes_X_test[:2], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [{"data": {"text/plain": ["[214.724777447606, 175.29091463098356]"]}, "execution_count": 78, "metadata": {}, "output_type": "execute_result"}], "source": ["def predict_clr_cython_type_multi(X, coef, intercept):\n", " return [predict_clr_cython_type(x, coef, intercept) for x in X]\n", "\n", "predict_clr_cython_type_multi(diabetes_X_test[:2], clr.coef_, clr.intercept_)"]}, {"cell_type": "code", "execution_count": 78, "metadata": {"scrolled": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["batch = 1\n", "Moyenne: 62.45 \u00b5s Ecart-type 33.39 \u00b5s (with 10 runs) in [41.71 \u00b5s, 119.15 \u00b5s]\n", "Moyenne: 6.43 \u00b5s Ecart-type 496.70 ns (with 10 runs) in [6.26 \u00b5s, 8.59 \u00b5s]\n", "Moyenne: 12.10 \u00b5s Ecart-type 8.26 \u00b5s (with 10 runs) in [8.20 \u00b5s, 26.70 \u00b5s]\n", "Moyenne: 1.99 \u00b5s Ecart-type 259.55 ns (with 10 runs) in [1.95 \u00b5s, 1.99 \u00b5s]\n", "Moyenne: 1.61 \u00b5s Ecart-type 103.91 ns (with 10 runs) in [1.59 \u00b5s, 1.63 \u00b5s]\n", "Moyenne: 19.38 \u00b5s Ecart-type 11.88 \u00b5s (with 10 runs) in [13.14 \u00b5s, 43.26 \u00b5s]\n", "batch = 10\n", "Moyenne: 73.96 \u00b5s Ecart-type 41.93 \u00b5s (with 10 runs) in [43.50 \u00b5s, 116.26 \u00b5s]\n", "Moyenne: 101.06 \u00b5s Ecart-type 3.73 \u00b5s (with 10 runs) in [98.16 \u00b5s, 111.41 \u00b5s]\n", "Moyenne: 19.06 \u00b5s Ecart-type 31.16 \u00b5s (with 10 runs) in [11.86 \u00b5s, 25.57 \u00b5s]\n", "Moyenne: 10.84 \u00b5s Ecart-type 5.26 \u00b5s (with 10 runs) in [8.34 \u00b5s, 22.70 \u00b5s]\n", "Moyenne: 10.46 \u00b5s Ecart-type 5.49 \u00b5s (with 10 runs) in [5.69 \u00b5s, 20.84 \u00b5s]\n", "Moyenne: 19.66 \u00b5s Ecart-type 25.05 \u00b5s (with 10 runs) in [12.23 \u00b5s, 34.34 \u00b5s]\n", "batch = 100\n", "Moyenne: 68.65 \u00b5s Ecart-type 26.04 \u00b5s (with 10 runs) in [46.99 \u00b5s, 119.00 \u00b5s]\n", "Moyenne: 740.30 \u00b5s Ecart-type 156.38 \u00b5s (with 10 runs) in [512.14 \u00b5s, 1.02 ms]\n", "Moyenne: 10.75 \u00b5s Ecart-type 3.89 \u00b5s (with 10 runs) in [8.72 \u00b5s, 16.79 \u00b5s]\n", "Moyenne: 94.32 \u00b5s Ecart-type 14.42 \u00b5s (with 10 runs) in [72.11 \u00b5s, 124.00 \u00b5s]\n", "Moyenne: 67.23 \u00b5s Ecart-type 31.05 \u00b5s (with 10 runs) in [43.72 \u00b5s, 135.94 \u00b5s]\n", "Moyenne: 91.28 \u00b5s Ecart-type 164.49 \u00b5s (with 10 runs) in [15.53 \u00b5s, 481.48 \u00b5s]\n", "batch = 200\n", "Moyenne: 68.82 \u00b5s Ecart-type 38.95 \u00b5s (with 10 runs) in [46.58 \u00b5s, 152.87 \u00b5s]\n", "Moyenne: 1.59 ms Ecart-type 497.08 \u00b5s (with 10 runs) in [1.09 ms, 2.98 ms]\n", "Moyenne: 11.66 \u00b5s Ecart-type 2.01 \u00b5s (with 10 runs) in [9.79 \u00b5s, 16.71 \u00b5s]\n", "Moyenne: 167.67 \u00b5s Ecart-type 37.37 \u00b5s (with 10 runs) in [133.64 \u00b5s, 240.53 \u00b5s]\n", "Moyenne: 102.09 \u00b5s Ecart-type 25.18 \u00b5s (with 10 runs) in [86.07 \u00b5s, 162.09 \u00b5s]\n", "Moyenne: 18.04 \u00b5s Ecart-type 8.31 \u00b5s (with 10 runs) in [15.00 \u00b5s, 34.57 \u00b5s]\n", "batch = 500\n", "Moyenne: 63.53 \u00b5s Ecart-type 20.92 \u00b5s (with 10 runs) in [50.94 \u00b5s, 116.69 \u00b5s]\n", "Moyenne: 3.22 ms Ecart-type 296.30 \u00b5s (with 10 runs) in [2.84 ms, 3.80 ms]\n", "Moyenne: 13.91 \u00b5s Ecart-type 4.58 \u00b5s (with 10 runs) in [11.80 \u00b5s, 26.71 \u00b5s]\n", "Moyenne: 410.88 \u00b5s Ecart-type 73.68 \u00b5s (with 10 runs) in [333.06 \u00b5s, 523.19 \u00b5s]\n", "Moyenne: 263.08 \u00b5s Ecart-type 117.22 \u00b5s (with 10 runs) in [211.75 \u00b5s, 444.83 \u00b5s]\n", "Moyenne: 22.28 \u00b5s Ecart-type 12.93 \u00b5s (with 10 runs) in [19.16 \u00b5s, 37.56 \u00b5s]\n", "batch = 1000\n", "Moyenne: 153.47 \u00b5s Ecart-type 43.85 \u00b5s (with 10 runs) in [125.94 \u00b5s, 229.51 \u00b5s]\n", "Moyenne: 5.52 ms Ecart-type 389.98 \u00b5s (with 10 runs) in [4.99 ms, 6.18 ms]\n", "Moyenne: 83.03 \u00b5s Ecart-type 22.52 \u00b5s (with 10 runs) in [73.67 \u00b5s, 95.91 \u00b5s]\n", "Moyenne: 702.77 \u00b5s Ecart-type 76.26 \u00b5s (with 10 runs) in [661.35 \u00b5s, 888.11 \u00b5s]\n", "Moyenne: 445.87 \u00b5s Ecart-type 53.38 \u00b5s (with 10 runs) in [420.78 \u00b5s, 548.59 \u00b5s]\n", "Moyenne: 27.48 \u00b5s Ecart-type 6.31 \u00b5s (with 10 runs) in [26.53 \u00b5s, 29.87 \u00b5s]\n", "batch = 2000\n", "Moyenne: 147.73 \u00b5s Ecart-type 19.47 \u00b5s (with 10 runs) in [132.10 \u00b5s, 187.47 \u00b5s]\n", "Moyenne: 83.71 \u00b5s Ecart-type 4.68 \u00b5s (with 10 runs) in [79.25 \u00b5s, 93.14 \u00b5s]\n", "Moyenne: 1.58 ms Ecart-type 216.25 \u00b5s (with 10 runs) in [1.32 ms, 1.97 ms]\n", "Moyenne: 47.31 \u00b5s Ecart-type 20.65 \u00b5s (with 10 runs) in [37.00 \u00b5s, 97.03 \u00b5s]\n", "batch = 3000\n", "Moyenne: 179.79 \u00b5s Ecart-type 45.35 \u00b5s (with 10 runs) in [144.02 \u00b5s, 310.74 \u00b5s]\n", "Moyenne: 92.27 \u00b5s Ecart-type 7.05 \u00b5s (with 10 runs) in [84.71 \u00b5s, 106.64 \u00b5s]\n", "Moyenne: 2.37 ms Ecart-type 267.39 \u00b5s (with 10 runs) in [1.99 ms, 2.91 ms]\n", "Moyenne: 50.69 \u00b5s Ecart-type 6.32 \u00b5s (with 10 runs) in [48.65 \u00b5s, 52.22 \u00b5s]\n", "batch = 4000\n", "Moyenne: 193.02 \u00b5s Ecart-type 28.74 \u00b5s (with 10 runs) in [173.71 \u00b5s, 211.52 \u00b5s]\n", "Moyenne: 100.06 \u00b5s Ecart-type 22.27 \u00b5s (with 10 runs) in [85.61 \u00b5s, 133.38 \u00b5s]\n", "Moyenne: 3.13 ms Ecart-type 296.38 \u00b5s (with 10 runs) in [2.73 ms, 3.54 ms]\n", "Moyenne: 64.67 \u00b5s Ecart-type 7.43 \u00b5s (with 10 runs) in [59.90 \u00b5s, 68.08 \u00b5s]\n", "batch = 5000\n", "Moyenne: 215.06 \u00b5s Ecart-type 46.52 \u00b5s (with 10 runs) in [196.06 \u00b5s, 411.19 \u00b5s]\n", "Moyenne: 110.91 \u00b5s Ecart-type 8.08 \u00b5s (with 10 runs) in [90.36 \u00b5s, 122.94 \u00b5s]\n", "Moyenne: 3.49 ms Ecart-type 212.13 \u00b5s (with 10 runs) in [3.30 ms, 4.04 ms]\n", "Moyenne: 78.86 \u00b5s Ecart-type 5.47 \u00b5s (with 10 runs) in [77.15 \u00b5s, 102.21 \u00b5s]\n", "batch = 10000\n", "Moyenne: 248.75 \u00b5s Ecart-type 64.14 \u00b5s (with 10 runs) in [192.57 \u00b5s, 425.01 \u00b5s]\n", "Moyenne: 116.55 \u00b5s Ecart-type 17.05 \u00b5s (with 10 runs) in [100.13 \u00b5s, 152.60 \u00b5s]\n", "Moyenne: 7.18 ms Ecart-type 420.77 \u00b5s (with 10 runs) in [6.62 ms, 8.15 ms]\n", "Moyenne: 153.30 \u00b5s Ecart-type 13.69 \u00b5s (with 10 runs) in [149.03 \u00b5s, 211.69 \u00b5s]\n", "batch = 20000\n", "Moyenne: 293.81 \u00b5s Ecart-type 19.49 \u00b5s (with 10 runs) in [283.46 \u00b5s, 364.31 \u00b5s]\n", "Moyenne: 147.12 \u00b5s Ecart-type 8.23 \u00b5s (with 10 runs) in [135.43 \u00b5s, 160.67 \u00b5s]\n", "Moyenne: 215.69 \u00b5s Ecart-type 14.46 \u00b5s (with 10 runs) in [204.68 \u00b5s, 262.99 \u00b5s]\n", "batch = 50000\n", "Moyenne: 1.00 ms Ecart-type 44.28 \u00b5s (with 10 runs) in [967.01 \u00b5s, 1.13 ms]\n", "Moyenne: 503.33 \u00b5s Ecart-type 13.21 \u00b5s (with 10 runs) in [491.66 \u00b5s, 551.81 \u00b5s]\n", "Moyenne: 1.86 ms Ecart-type 1.14 ms (with 10 runs) in [1.13 ms, 4.90 ms]\n", "batch = 75000\n", "Moyenne: 1.75 ms Ecart-type 153.53 \u00b5s (with 10 runs) in [1.56 ms, 1.94 ms]\n", "Moyenne: 663.38 \u00b5s Ecart-type 20.47 \u00b5s (with 10 runs) in [630.15 \u00b5s, 700.62 \u00b5s]\n", "Moyenne: 1.88 ms Ecart-type 173.99 \u00b5s (with 10 runs) in [1.65 ms, 2.14 ms]\n", "batch = 100000\n", "Moyenne: 2.56 ms Ecart-type 204.42 \u00b5s (with 10 runs) in [2.27 ms, 2.85 ms]\n", "Moyenne: 1.21 ms Ecart-type 113.75 \u00b5s (with 10 runs) in [1.04 ms, 1.44 ms]\n", "Moyenne: 2.98 ms Ecart-type 934.23 \u00b5s (with 10 runs) in [2.22 ms, 6.31 ms]\n", "batch = 150000\n", "Moyenne: 4.00 ms Ecart-type 188.08 \u00b5s (with 10 runs) in [3.78 ms, 4.46 ms]\n", "Moyenne: 2.92 ms Ecart-type 344.26 \u00b5s (with 10 runs) in [2.54 ms, 3.93 ms]\n", "Moyenne: 3.76 ms Ecart-type 308.56 \u00b5s (with 10 runs) in [3.26 ms, 4.52 ms]\n", "batch = 200000\n", "Moyenne: 5.73 ms Ecart-type 424.36 \u00b5s (with 10 runs) in [5.17 ms, 6.72 ms]\n", "Moyenne: 4.00 ms Ecart-type 606.67 \u00b5s (with 10 runs) in [3.50 ms, 6.04 ms]\n", "Moyenne: 5.44 ms Ecart-type 742.52 \u00b5s (with 10 runs) in [4.57 ms, 7.38 ms]\n", "batch = 300000\n", "Moyenne: 8.36 ms Ecart-type 1.26 ms (with 10 runs) in [7.78 ms, 13.52 ms]\n", "Moyenne: 5.37 ms Ecart-type 352.34 \u00b5s (with 10 runs) in [5.08 ms, 6.64 ms]\n", "Moyenne: 7.18 ms Ecart-type 680.24 \u00b5s (with 10 runs) in [6.69 ms, 8.83 ms]\n", "batch = 400000\n", "Moyenne: 11.49 ms Ecart-type 1.16 ms (with 10 runs) in [10.36 ms, 15.15 ms]\n", "Moyenne: 7.87 ms Ecart-type 709.04 \u00b5s (with 10 runs) in [7.18 ms, 9.70 ms]\n", "Moyenne: 10.51 ms Ecart-type 900.27 \u00b5s (with 10 runs) in [9.41 ms, 13.22 ms]\n", "batch = 500000\n", "Moyenne: 15.01 ms Ecart-type 1.90 ms (with 10 runs) in [12.99 ms, 20.81 ms]\n", "Moyenne: 11.02 ms Ecart-type 889.69 \u00b5s (with 10 runs) in [9.64 ms, 13.29 ms]\n", "Moyenne: 17.02 ms Ecart-type 2.13 ms (with 10 runs) in [14.72 ms, 22.19 ms]\n", "batch = 600000\n", "Moyenne: 21.19 ms Ecart-type 1.93 ms (with 10 runs) in [18.32 ms, 26.29 ms]\n", "Moyenne: 12.47 ms Ecart-type 964.03 \u00b5s (with 10 runs) in [11.00 ms, 14.31 ms]\n", "Moyenne: 18.04 ms Ecart-type 2.80 ms (with 10 runs) in [13.37 ms, 24.63 ms]\n"]}], "source": ["memo = []\n", "batch = [1, 10, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000,\n", " 20000, 50000, 75000, 100000, 150000, 200000, 300000, 400000,\n", " 500000, 600000]\n", "number = 10\n", "for i in batch:\n", " if i <= diabetes_X_test.shape[0]:\n", " mx = diabetes_X_test[:i]\n", " else:\n", " mxs = [diabetes_X_test] * (i // diabetes_X_test.shape[0] + 1)\n", " mx = numpy.vstack(mxs)\n", " mx = mx[:i]\n", "\n", " print(\"batch\", \"=\", i)\n", " repeat=20 if i >= 5000 else 100\n", " \n", " memo.append(timeexe(\"sklearn.predict %d\" % i, \"clr.predict(mx)\", \n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"sklearn\"\n", " \n", " if i <= 1000:\n", " # tr\u00e8s lent\n", " memo.append(timeexe(\"python %d\" % i, \"predict_clr_python_loop_multi(mx, clr.coef_, clr.intercept_)\",\n", " repeat=20, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"python\"\n", " \n", " memo.append(timeexe(\"numpy %d\" % i, \"predict_clr_numpy_loop_multi(mx, clr.coef_, clr.intercept_)\",\n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"numpy\"\n", " \n", " if i <= 10000:\n", " # tr\u00e8s lent\n", " memo.append(timeexe(\"numba %d\" % i, \"predict_clr_numba_cast_multi(mx, clr.coef_, clr.intercept_)\",\n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"numba\"\n", " \n", " if i <= 1000:\n", " # tr\u00e8s lent\n", " memo.append(timeexe(\"cython %d\" % i, \"predict_clr_cython_type_multi(mx, clr.coef_, clr.intercept_)\",\n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"cython\"\n", " \n", " if ok_onnx:\n", " memo.append(timeexe(\"onnxruntime %d\" % i, \"predict_onnxrt(mx.astype(numpy.float32))\",\n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"onnxruntime\""]}, {"cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
libcythonnumbanumpyonnxruntimepythonsklearn
batch
10.0000020.0000020.0000120.0000190.0000060.000062
100.0000100.0000110.0000190.0000200.0001010.000074
1000.0000670.0000940.0000110.0000910.0007400.000069
2000.0001020.0001680.0000120.0000180.0015900.000069
5000.0002630.0004110.0000140.0000220.0032250.000064
10000.0004460.0007030.0000830.0000270.0055160.000153
2000NaN0.0015800.0000840.000047NaN0.000148
3000NaN0.0023710.0000920.000051NaN0.000180
4000NaN0.0031250.0001000.000065NaN0.000193
5000NaN0.0034900.0001110.000079NaN0.000215
10000NaN0.0071810.0001170.000153NaN0.000249
20000NaNNaN0.0001470.000216NaN0.000294
50000NaNNaN0.0005030.001863NaN0.001000
75000NaNNaN0.0006630.001879NaN0.001749
100000NaNNaN0.0012090.002980NaN0.002557
150000NaNNaN0.0029230.003762NaN0.004001
200000NaNNaN0.0040010.005440NaN0.005731
300000NaNNaN0.0053660.007180NaN0.008365
400000NaNNaN0.0078720.010510NaN0.011489
500000NaNNaN0.0110160.017021NaN0.015013
600000NaNNaN0.0124680.018040NaN0.021193
\n", "
"], "text/plain": ["lib cython numba numpy onnxruntime python sklearn\n", "batch \n", "1 0.000002 0.000002 0.000012 0.000019 0.000006 0.000062\n", "10 0.000010 0.000011 0.000019 0.000020 0.000101 0.000074\n", "100 0.000067 0.000094 0.000011 0.000091 0.000740 0.000069\n", "200 0.000102 0.000168 0.000012 0.000018 0.001590 0.000069\n", "500 0.000263 0.000411 0.000014 0.000022 0.003225 0.000064\n", "1000 0.000446 0.000703 0.000083 0.000027 0.005516 0.000153\n", "2000 NaN 0.001580 0.000084 0.000047 NaN 0.000148\n", "3000 NaN 0.002371 0.000092 0.000051 NaN 0.000180\n", "4000 NaN 0.003125 0.000100 0.000065 NaN 0.000193\n", "5000 NaN 0.003490 0.000111 0.000079 NaN 0.000215\n", "10000 NaN 0.007181 0.000117 0.000153 NaN 0.000249\n", "20000 NaN NaN 0.000147 0.000216 NaN 0.000294\n", "50000 NaN NaN 0.000503 0.001863 NaN 0.001000\n", "75000 NaN NaN 0.000663 0.001879 NaN 0.001749\n", "100000 NaN NaN 0.001209 0.002980 NaN 0.002557\n", "150000 NaN NaN 0.002923 0.003762 NaN 0.004001\n", "200000 NaN NaN 0.004001 0.005440 NaN 0.005731\n", "300000 NaN NaN 0.005366 0.007180 NaN 0.008365\n", "400000 NaN NaN 0.007872 0.010510 NaN 0.011489\n", "500000 NaN NaN 0.011016 0.017021 NaN 0.015013\n", "600000 NaN NaN 0.012468 0.018040 NaN 0.021193"]}, "execution_count": 80, "metadata": {}, "output_type": "execute_result"}], "source": ["dfb = pandas.DataFrame(memo)[[\"average\", \"lib\", \"batch\"]]\n", "piv = dfb.pivot(\"batch\", \"lib\", \"average\")\n", "piv"]}, {"cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
libcythonnumbanumpyonnxruntimepythonsklearnave_cythonave_numbaave_numpyave_onnxruntimeave_pythonave_sklearn
batch
10.0000020.0000020.0000120.0000190.0000060.0000621.614900e-061.990900e-061.209790e-051.938460e-050.0000066.245340e-05
100.0000100.0000110.0000190.0000200.0001010.0000741.046460e-061.084320e-061.906010e-061.965810e-060.0000107.396440e-06
1000.0000670.0000940.0000110.0000910.0007400.0000696.722760e-079.431990e-071.075410e-079.127790e-070.0000076.865190e-07
2000.0001020.0001680.0000120.0000180.0015900.0000695.104525e-078.383455e-075.827850e-089.019900e-080.0000083.440995e-07
5000.0002630.0004110.0000140.0000220.0032250.0000645.261610e-078.217592e-072.781740e-084.455220e-080.0000061.270610e-07
10000.0004460.0007030.0000830.0000270.0055160.0001534.458687e-077.027674e-078.303090e-082.747640e-080.0000061.534708e-07
2000NaN0.0015800.0000840.000047NaN0.000148NaN7.899395e-074.185515e-082.365645e-08NaN7.386540e-08
3000NaN0.0023710.0000920.000051NaN0.000180NaN7.902492e-073.075760e-081.689707e-08NaN5.992867e-08
4000NaN0.0031250.0001000.000065NaN0.000193NaN7.813673e-072.501480e-081.616818e-08NaN4.825388e-08
5000NaN0.0034900.0001110.000079NaN0.000215NaN6.979748e-072.218220e-081.577170e-08NaN4.301210e-08
10000NaN0.0071810.0001170.000153NaN0.000249NaN7.180820e-071.165535e-081.533050e-08NaN2.487490e-08
20000NaNNaN0.0001470.000216NaN0.000294NaNNaN7.356025e-091.078465e-08NaN1.469057e-08
50000NaNNaN0.0005030.001863NaN0.001000NaNNaN1.006655e-083.725768e-08NaN2.000188e-08
75000NaNNaN0.0006630.001879NaN0.001749NaNNaN8.845087e-092.505991e-08NaN2.331396e-08
100000NaNNaN0.0012090.002980NaN0.002557NaNNaN1.208690e-082.980086e-08NaN2.556766e-08
150000NaNNaN0.0029230.003762NaN0.004001NaNNaN1.948814e-082.508106e-08NaN2.667062e-08
200000NaNNaN0.0040010.005440NaN0.005731NaNNaN2.000416e-082.720136e-08NaN2.865267e-08
300000NaNNaN0.0053660.007180NaN0.008365NaNNaN1.788538e-082.393301e-08NaN2.788189e-08
400000NaNNaN0.0078720.010510NaN0.011489NaNNaN1.967972e-082.627497e-08NaN2.872169e-08
500000NaNNaN0.0110160.017021NaN0.015013NaNNaN2.203297e-083.404131e-08NaN3.002589e-08
600000NaNNaN0.0124680.018040NaN0.021193NaNNaN2.077927e-083.006664e-08NaN3.532122e-08
\n", "
"], "text/plain": ["lib cython numba numpy onnxruntime python sklearn \\\n", "batch \n", "1 0.000002 0.000002 0.000012 0.000019 0.000006 0.000062 \n", "10 0.000010 0.000011 0.000019 0.000020 0.000101 0.000074 \n", "100 0.000067 0.000094 0.000011 0.000091 0.000740 0.000069 \n", "200 0.000102 0.000168 0.000012 0.000018 0.001590 0.000069 \n", "500 0.000263 0.000411 0.000014 0.000022 0.003225 0.000064 \n", "1000 0.000446 0.000703 0.000083 0.000027 0.005516 0.000153 \n", "2000 NaN 0.001580 0.000084 0.000047 NaN 0.000148 \n", "3000 NaN 0.002371 0.000092 0.000051 NaN 0.000180 \n", "4000 NaN 0.003125 0.000100 0.000065 NaN 0.000193 \n", "5000 NaN 0.003490 0.000111 0.000079 NaN 0.000215 \n", "10000 NaN 0.007181 0.000117 0.000153 NaN 0.000249 \n", "20000 NaN NaN 0.000147 0.000216 NaN 0.000294 \n", "50000 NaN NaN 0.000503 0.001863 NaN 0.001000 \n", "75000 NaN NaN 0.000663 0.001879 NaN 0.001749 \n", "100000 NaN NaN 0.001209 0.002980 NaN 0.002557 \n", "150000 NaN NaN 0.002923 0.003762 NaN 0.004001 \n", "200000 NaN NaN 0.004001 0.005440 NaN 0.005731 \n", "300000 NaN NaN 0.005366 0.007180 NaN 0.008365 \n", "400000 NaN NaN 0.007872 0.010510 NaN 0.011489 \n", "500000 NaN NaN 0.011016 0.017021 NaN 0.015013 \n", "600000 NaN NaN 0.012468 0.018040 NaN 0.021193 \n", "\n", "lib ave_cython ave_numba ave_numpy ave_onnxruntime ave_python \\\n", "batch \n", "1 1.614900e-06 1.990900e-06 1.209790e-05 1.938460e-05 0.000006 \n", "10 1.046460e-06 1.084320e-06 1.906010e-06 1.965810e-06 0.000010 \n", "100 6.722760e-07 9.431990e-07 1.075410e-07 9.127790e-07 0.000007 \n", "200 5.104525e-07 8.383455e-07 5.827850e-08 9.019900e-08 0.000008 \n", "500 5.261610e-07 8.217592e-07 2.781740e-08 4.455220e-08 0.000006 \n", "1000 4.458687e-07 7.027674e-07 8.303090e-08 2.747640e-08 0.000006 \n", "2000 NaN 7.899395e-07 4.185515e-08 2.365645e-08 NaN \n", "3000 NaN 7.902492e-07 3.075760e-08 1.689707e-08 NaN \n", "4000 NaN 7.813673e-07 2.501480e-08 1.616818e-08 NaN \n", "5000 NaN 6.979748e-07 2.218220e-08 1.577170e-08 NaN \n", "10000 NaN 7.180820e-07 1.165535e-08 1.533050e-08 NaN \n", "20000 NaN NaN 7.356025e-09 1.078465e-08 NaN \n", "50000 NaN NaN 1.006655e-08 3.725768e-08 NaN \n", "75000 NaN NaN 8.845087e-09 2.505991e-08 NaN \n", "100000 NaN NaN 1.208690e-08 2.980086e-08 NaN \n", "150000 NaN NaN 1.948814e-08 2.508106e-08 NaN \n", "200000 NaN NaN 2.000416e-08 2.720136e-08 NaN \n", "300000 NaN NaN 1.788538e-08 2.393301e-08 NaN \n", "400000 NaN NaN 1.967972e-08 2.627497e-08 NaN \n", "500000 NaN NaN 2.203297e-08 3.404131e-08 NaN \n", "600000 NaN NaN 2.077927e-08 3.006664e-08 NaN \n", "\n", "lib ave_sklearn \n", "batch \n", "1 6.245340e-05 \n", "10 7.396440e-06 \n", "100 6.865190e-07 \n", "200 3.440995e-07 \n", "500 1.270610e-07 \n", "1000 1.534708e-07 \n", "2000 7.386540e-08 \n", "3000 5.992867e-08 \n", "4000 4.825388e-08 \n", "5000 4.301210e-08 \n", "10000 2.487490e-08 \n", "20000 1.469057e-08 \n", "50000 2.000188e-08 \n", "75000 2.331396e-08 \n", "100000 2.556766e-08 \n", "150000 2.667062e-08 \n", "200000 2.865267e-08 \n", "300000 2.788189e-08 \n", "400000 2.872169e-08 \n", "500000 3.002589e-08 \n", "600000 3.532122e-08 "]}, "execution_count": 81, "metadata": {}, "output_type": "execute_result"}], "source": ["for c in piv.columns:\n", " piv[\"ave_\" + c] = piv[c] / piv.index\n", "piv"]}, {"cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [{"data": {"image/png": "\n", "text/plain": ["
"]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["libs = list(c for c in piv.columns if \"ave_\" in c)\n", "ax = piv.plot(y=libs, logy=True, logx=True, figsize=(10, 5))\n", "ax.set_title(\"Evolution du temps de pr\u00e9diction selon la taille du batch\")\n", "ax.grid(True);"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Le minimum obtenu est pour $10^{-8} s$ soit 10 ns. Cela montre que la comparaisson pr\u00e9c\u00e9dente \u00e9tait incompl\u00e8te voire biais\u00e9e. Tout d\u00e9pend de l'usage qu'on fait de la fonction de pr\u00e9diction m\u00eame s'il sera toujours possible de d'\u00e9crire un code sp\u00e9cialis\u00e9 plus rapide que toute autre fonction g\u00e9n\u00e9rique. En g\u00e9n\u00e9ral, plus on reste du c\u00f4t\u00e9 Python, plus le programme est lent. Le nombre de passage de l'un \u00e0 l'autre, selon la fa\u00e7on dont il est fait ralenti aussi. En tenant compte de cela, le programme rouge sera plus lent que le vert."]}, {"cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [{"data": {"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwMAAAE9CAIAAADYrymAAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAAFiUAABYlAUlSJPAAACViSURBVHhe7d1Ldty6loThM5Bq1arhVFMD0mjcv5PwRNSuMZxiJkMSGMSLmXwAxP+taNzr3CBBEKS2Zfn4n38BAABGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScEAADGRScE3MDXn49/vn3+1S8CAMrohIAboBMCgBfRCQE3QCcEAC+iEwIAAOOiEwIAAOOiEwIAAOOiEwJa8vdTP+0zSf3AT+SHgvI/J/T19ffP58dvxdPHx8fnn79fKolZj5pHBCcLz1X8WaWKH2aaT7mcaXqer50x/MXouPAepCYK4D7O64SWr5+k6S1Yej9vNr9dP//YQSvey8DZyq1QbOOmN/PX38/8k/fhD8ZTZlTYqYTnKj5Q+YLSRNdP8MtnDH95NZI+CBhNc53Qj4/PXdqhr6kJ0gHphNCDUisU3bepzVz32Plp6h/W1LliE88WLPqPJHuIXz/j8gqDj+iDgPG02wk9vP8iCt5rdELoQ35jxj9NjFl8Xf/48/dLz8Djj8vC53HxcNij+vN7kuf3VvWL38L5FR+odMFyno9vCn/PZ/6DPX0ySc504xntKufDLn8tekQA93NNJ7RqSmZf01vPXrXvvozohNCf3M5M7Oj4kPwW//708SfSwaEWXclqVHjISfh5/myTVMHy12PvhunFoM8XI18+42zZf/1ZNIeJdxSAG2qqE5qFhYn3Wz06IXQovTVTGzo+YvEsVf5xc9geRB8LO6h+dVJ8oBIFuYf0RzA2fn1bzvhj0QsF6IOAkTTYCS1fT2++knIv2eJrFLhIam8m93PFgNnH55/gz57Wap6K4KBhSXFovCD81Sq/V/7iGUOrBZq8+dIB0JkmO6HFq/H5/gpfV/EX2mTxVWL6DbD+98r3AewtOf8NM/3/p8dfY8t90ZisBz2GhT/nsPA7xccSxM+481+cQ5fiX8ITLcgk/SU//Tey5qZIZd8WT1FiJwYnC8+VnoMUL6rSz9gXz7jk56cPAkbTSSdUM9pe4en36/cbcXGanPgJf/9WWkJk2O+kHj+bof8ZUVggDCDYwNqxuS/ruc+iP+ocWPw1TXuMooKThefKzuEhXpB+UlN+xr54RmMTSNYBuKlu/nSsNHz1efr9+v2mC4cUrM5Y+m+0fLO3av1Lv7BEuL9gszy3UfarevbDb8+/h7X6HubTz34Lzprcg8HJwnMV5xAvqDljwotnXIg8k4lKADfVYCe0eDP9VubHp954ubdsOGb2+0dTq99FL16O9vIM/gxt/bffFqddvXWDM/r3mHgdjy7YLdNmyH9Rz38akfrb6TUHWk7sR3FouP1/CzZP/deLZ/wVHiC0cR4AutZSJ+Tv5klYmDtA8o0YvAlXJ7W34Oqv1iw+D0Yvf738fg0qwhdz5IyLz+OLhIEst8uP2M6IPgHPB0rfAop+bY+NKjUP6YroHAKLxyIoqNr130XPH6X7rnn1jLL4+HFk/a+H2OEA3NM1nVAlfysm39HBsW1MMGT1ji28JifR0eGw5Hs7ceziOz83Xwwn3C8/ohsj3HD1+y06anHSfLduz0340eqBWjwSy88Xh4z93iL8zwkthr56xoflx49P178CYATNdkKLn+L8lnjvBb/sr/vMR4sZxd96sYrw1zLNSrwsN52n8pwwknDHy6aNsxwf/DHu6s9xg0HhsSa/o9bftF1u0uXAnz/5jXyzNztwMdHVnzcvrv/lMy5H/hxzebzlCAB31Von9Pzed+KvoE/CF/vPayrXXuQ+C2cUf+fFKsqjZtG63HSeao+OQYQb4imxK1IbZzU+znfjsoPKWE6n8mwPfh2VZ/SJvnjGxdmWx8x8BOCeGvyJ6azwNaU3W/BL/nZdfLg6aeqLx69YRe6IoejRi4PLc8JYwh0xSW2KzMaxI6xFt+LqR/9/hT9Qs5pP7m9VZgfmzjgLfkAotP2M4VskcvWFjwHcTW+d0OqNn/kK8LB/J1QeNQtfpz91dELYKtwS6T1R2DjzHxcFB3rQN2BVErMaNY2YBgTbODahyMme477CWSauZBoam2juG8WTTWdcNDrxaSxK6IWAu+uuE7LjhP9oYuyllms9Cl88JrGKyuuIl9EJ4QYKnRAAdKW/TmhxoED8mPt3QnUXspjk78HphHADdEIA7qTDTmjZZXxLHDLXWZS7jnhF8HVgEhu5nGFQQSeEG6ATAnAnPXZC3os8pI6YO2u560hUhL/84H/nd/Hp4qx0QrgBOiEAd9JnJ7RqhZLvY29aHr7PXu460hXrXizOjksnhBugEwJwJ512QtaJZF7HkVZoj05oUvq36GN/55dOCDdAJwTgTnrthLa0DN6yfKj+zU7oaf7bu8vDZ/5uMp0QAABNOa8T2lfQMezQVgEAgDF12gkVv7cCAABQ1mMn9BX+kBB/hAQAAF7WTycU/gTND74hBAAA3tB1J0QfBAAA3tLRn44t/snp+V+CBAAAeEenPzENAACwAzohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwLjohAAAwrvt0Qv/3X//zcnQIAAAwmMs6IetFwqhiCzvCC9GBAADAHl9Y34zmcbwWO6GropkBADA2+/rYTjS/XdEJLaLJAQAwJPuy2Fd0DRvRCXk0PwBA8+wFXhkNxpKtUqfRxWxBJxSJpggAaJi9ujdFh8A3W5/eo6uqQycUj2YJ4L7sqT8uOh92Ysv7WnQsPNni3Ca6vBI6oVw0VwC3Yw/7OdG5sYWt4Y7RCYZny3Kn6ApLzuuE/vs//xvGpntcdPon+6gyGgzgFuwBPz+aB+rY6u0YnWBstiY3iy6y5LJOaIrN+Ijo3AErqI/GA+iTPdGXR9NCmq3YEdGZhmRLUR+N348df8foBCVXdkJ7RSeoZitVH40H0A97iluLZokVW6jjovONwa79hehAx7PzvhYdq+QOndAcnaaOLVZ9NB5AD+z5bTOaKwK2RIdGpxyAXfjW6ChtsLmlouqS+3RCU3SmOrZeW6OjAGiVPbMtRzPGky3Oa9l0qLn43uySN0WHaIxNMhqVltyqE5qj81WwJdsaHQVAG+wJ7Su6BuxxH3WgJ/soGpXel13vpugQrbLZWlRUcsNOaI7OWmKrtjU6CoCL2CO5S3To7ew4L0QHGputSX00fslqUlH17dhlbo2O0jyb9k/0ccltO6EpOnEFW7tN0SHQP7uzL0QHwvFs5XeMTvAeO+am6BADswVJRdUVbGAqqr4Ru8Ct0VHS7Gtum9Fcs87rhDJs3vtG56hgm2BrdBQ0w27QOdG5cSRb872io+/Hjl8fjR+SLUUmGlDBBqai6huxC9wUHSLGvsg2Hk06q4lOyNhlvB8dt47thvpoPNpgd+fMaAY4hq32+5kPay+N9zMfdmKnq4/GD8YWIRqVbmFHiEald2FXVx+NT7B93n4076wWO6EUu7z6aHw12xavRcfCFexeXBJNBbuyRX4n09HsRXFE5mlP7Oz10fgx2LVHo9Lt7DjrqO4W7NKK0bAS295dRFPP6qkTmtlFVkaDq9kueTk6HE5kt+DCaELYiS3vO5mOZq+I4zJPfmJzqI/G351ddTQqfYkdKhqVds4uqhgNK7GN3Us0+6z+OqGJXWdNNHI72zEvRAfCwWzZ24nmh/fYqr4TezmcGZtJZbQEt2aXHI1K32AHjEalPbMrykdjSmwndxRdQFaXndDMrrYYDdvO9s3W6Cg4hq12m9Fc8Spbz62xV8GFefla5nW4MbveaFT6BjtgNCrtmV1RJhpQwXZyR9EFZHXcCc3smjPRgJfY7nkhOhB2ZYvcbDRdbGTLuCn2+LeTly9tHnhLdqWpqPo9dsx1VNctu5xUVF3H9nBf0TVkdd8JTeyyM9GAl9g2eiE6EHZiy3tcdL6AFdREI7GFrWFN5oH24DeYd67ufuwyU1H12+ywFhX1ya4lFVXXsa1bE43sxx06oZndiVRU/QbbUpuiQ+BttrCbokO8x45ZjIahmi1gTeaB9sg3mxeucb7A+7HLjEale7AjW1TUIbuQTDSgju3bTDSgQ/fphCZ2V1JR9atsS70QHQivsvXcFB1iD3bkfDQG1WwBi5lH2cP+fubDZlj9psxHsAvJZx5yJ3aBqah6D3Zki4o6ZBeSiqqr2aZNRdV9GrETmqIBe7BNVhkNxna2kpuiQ+zKTpGKqlHHVq+YaYg94+9knsMmdoTKaPCTXVEmGnALdmmpqHondnCLijpkFxKNSqvZjk1F1d26VSc0sduTiqr3Y7utGA3DFraGW6OjHMBOlIqqkWWLVpNplD3g72SexgvsOJXR4Ce7rnw0pmd2Ramoeld2ijCq6JBdyDqqq2MbNRMN6Bmd0G5szxWjYahmC1gZDT6enTcalSLBlqsm0yh7uuszn/QIdqJ8NGbj5WtMt+xyUlH1AexEP9HHvbGriEaldWyXpqLqzt2tE5rYfYpGpbuyPVeMhqGOrV4xGnYum8M6qkOMrVVNplH2aNdkPt2h7IzFaNiWRdCAbtnlpKLqA9iJfqKPu2KXkIqqK9j+zEQDOjdoJzRF1XuznZeJBqCCLV0xGnY6m8Y6qkOMrVUmc7090TWZB57DTp2PxjzZxaai6j7ZtaSi6mPYuX6ij/th809F1RVsc2aiAf27YSc0sbsVjUoPYPsvEw1Aia1bPhpzEZvMOqrDii1UJlOxPc6VmU90JptAKqr+Ztebiqo7ZBeSiqqPYef6iT7uhE0+FVVXsJ2ZiQbcwj07oYnds2hUegDbhZloANJsxfLRmOvYfNZRHZZslTKZiu1Brsx8ovPZNKJR6Te75Ew0oDd2Famo+hh2rp/o4x7YzFNRdQXblvlozC3cthOa2G2LRqXHsO2YiQYgxtYqEw24lE0pGpUiYEuUyVRsT3FN5rNcxSazjuoCdtWZaEBX7BKiUelh7HQ/0cc9sJmnouoS25PFaNgt3LkTmtidW0d1h7EdmYkGIGBLlI/GXM1mFY1K8c3WJ5Op2B7hYuZTXMumtI7qluzaU1F1P2z+0aj0SHbGn+jj5tm0M9GALNuQxWjYXYzeCU1R6TFsR2aiAXiyxclHY5ph01tHdXjpAbHnNx+NaYBNzKKiGFuHaFTaCZv8Oqo7mJ30J/q4eTbtTDQgyzZkJhpwL3RCh99X25SpqBpbnvA5GtYSm+E6qhubrUkmGtBtGzSz6VlUtGJLEY1Ke2AzX0d1x7Pz/kQfN8+mnYqqs2wr5qMx90In9Iiqj2H7MhMNGJ4tSz4a0xibZCqqHpItRSYacIv3tU0yjCpWbDVSUXXbbM7RqPR4dt6f6OO22ZxTUXWW7cN8NOZ2bt4JTexGRqPSY9jWzEQDhmfLkokGtMfmmYqqh2RLkclcb89sJnN9m2yqYVSxYquRiqrbZnNeR3WnsFOHUUWrbLapqLrE9mEmGnBHdEKPqPQwtkEz0YCx2ZpkogFNsqlGo9Ih2VJkYk9rPjp6q2y2YVQRYwuSiqobZhNeR3WnsFNbVNQem2cqqq5g+zAVVd/U/Tuhid3RdVR3JNummWjAqGw1MtGAVtlsU1H1YGwRMpmK7WnNZD54y2zCYVSRYMuSiqpbZbNdR3WnsFNbVNQem2c0Kq1gmzAVVd/XEJ3QxO6rRUXHs/0ajUpHZauRiQY0zCaciqoHYBdeE3tU89FpGmYTDqOKNFuZVFTdHptnNCo9hZ3aoqL22DyjUWkF24TRqPTW6IQeUdEpbMuuo7oh2VKkourm2bRTUfXd2VXXxJ7TfHSattmcLSpKsMVJRdXtsXmuo7qz2NktKmqPzXMd1VWw7ReNSu9ulE5oYjc4jCpOYbt2HdWNx9YhGpV2wiafiqpvzS65mGmIPaTFzCdqnM3ZoqI0W6VUVN0Ym+Q6qjuLnd2iosbYJKNRaYntvVRUfXd0Qo+o4hS2a9dR3XhsHdZRXVfsElJR9U3ZxRYzDbEntJj5RF2wmYdRRZotVCqqboxN0qKiE9kELCpqjE0yGpVm2cZLRdUDGKgTmthtDqOKU9jGXUd1I7EViEalHbILiUalt2OXWcw0xJ7NYuYT9cImH0YVWbZcqai6GTY9i4rOZXNYR3XNsOlFo9Is23WpqHoMdEKKKk5he3cd1Q3DLj8VVXfILiQald6OXWYx9mAWo9P0w+YfRhVZtlypqLoZNj2Lik5n01hHdW2wua2juhLbdamoegx0QooqzmI72KKiMdi1Z6IBHbILSUXV92LXWIw9mJnoBL2xqwijihJbsVRU3Qabm0VFV7CZrKO6q9ms1lFdiW25TDRgDHRCv1HRWWwfr6O6W7NLzkQDumWXE41K78WuMR97JPPRCXpjVxFGFRVs3aJRaRtsbhYVXcFmso7qrmazWkd1WbbfMtGAYfx2Qv8MwG62RUVnsX0cjUpvyi42H43pmV3ROqq7EbvAVOZiex7zmYf0yC7EoqIKtobRqLQBNrEwqriITWYd1V3KphSNStNsp+WjtmAYj05I6zQAu9nrqO4Uto+jUekd2ZXmozGds4uKRqW3YJeWylxsT2I+85B+2eVYVFRiyxiNShtgEwujiovYZKJR6UVsMqmoOsH2WD5T/dwfjGNepVHY/V5HdaewfZyKqm/HLjMTDeifXVcqqu6fXVc0c6U9hvnMQ7pmV7SO6kpsMaNR6dVsVmFUcRGbTCqqvoLNJBqVJtjuykdjBmuGdM2DsFu+jupOYVs5Ew24EbvAfDTmFuzSolFp/+y61lHdlje1BnTOLmod1ZXYekaj0kvZlCwquohNJhVVX8FmEo1KE2x3ZaIBT+oRxqBrHofdeIuKzmK7ORVV34VdXT4acyN2gdGotHN2URYVPdljmIqq+2fXtY7qKtiqrqO669h8LCq6lE0pGpWezqYRjUrTbHeloupv6hHGoGseh937dVR3CtvQmWhA/+y68tGYe7FrTEXV3bLLWUd1da9pld6FXd06qqtgqxqNSi9ik7Go6Go2q2hUei6bQzQqTbCtlYkGfFOPMAZd8zjs3q+julPYhs5EA/pn15WJBtyRXWkqqu6NXUUqc7E9fanMxbdhV7eO6urYwkaj0tPZNNZRXQNsYqmo+hR26mhUmmD7KhMNCKhHGMPjanXdw7AdYFHRiWxnR6PSztlFZaIBN2UXm4qqu2KXkMlcb09fKnPxbdjVraO6Oraw0aj0dDaNdVTXAJtYKqo+np03FVXH2KbKRAMCc38wjuEueGKbwKKic9nmjkal3bLLyUQD7suuNxMN6IRNPhMNqPtPvan0XuwaLSqqZsu7jupOZ9NYR3UNsInlozFHsjNGo9IY21GZaMDYRuyEJrYVwqjiXLa/a6KR/bD5p6Lqu7OrTkXVzbNp56MxA7dBM7vSMKqoZiscjUrPZXNYR3VtsLnlozHHsHNFo9IE21GpqHp4dEIeVZzOdnkxGtYJm3wmGjAAu/BUVN02m3M+GlPxslbdfdn1/kQfb2GLHI1KT2QTWEd1zbDpFaNhe7OzRKPSBNtR0agUdELrqOIKttHz0Zge2Mwz0YAx2LWnouq22Zzz0Rg6oV1fRLbI0aj0RDYBi4paYjOsiUbuxA6eiQbE2F5KRdWgE1pHFVewjZ6PxrTN5pyPxozEViAalbbKZlsTjaQTOr0TmqLqs9jZw6iiMTbJTdEhNrKDVEaDE2wvpaJqDNsJTWxP/EQfX8S2eyYa0DCbcD4aMx5bh2hU2h6bZ000kjboyS7ZoqJqttSpqPpgdtJ1VNcem2eb0VxjbBelomo80Ql59PFFbLvnozGtstnmozFDsqVYR3WNsUkWo2Hf7LmzqOju7KotKtrC1jwVVR/JzmhRUcNswk1FU4yxLZSKqvGNTsijjy9iO74YDWuSTTUTDRiVrUY0Km2GTa8YDftmD906qrs7u2qLirawZc9EA45h51pHdQ2zCbcTzS/G9k8qqkaATsijj69j+74YDWuPzTMVVY/N1iQalTbAJlaMhgXsoVtHdXdnV21R0Ra28vlozAHsROuornk27Raima3Y5klF1ViiE/Lo42bYY5CKqltiM0xF1WOzNYlGpQ2wiWWiASv20FlUNAC7cIuKNrJbkIkGHMBOtI7qemAzvzaa04rtnEw0AEt0Qh593Ax7EmqikVezWUWjUvSzXDalfDRmyZ64dVQ3ALvwdVS3kd2FVFS9NztLNCrth83/qmg2AdswxWgYluiEIlFFG+xJqIwGX8fmE41K8WSLE41Kr2PzKUbDluxxW0d1Y7BrX0d1W9hdyEQD9mPHT0XVXbFLODmaxJJtlWI0DCt0QvGoqAH2PNRH469gM1lHdQjYEkWj0ovYZPLRmBV70KJR6Rjs2qNR6RZ2OzLRgJ3YwVNRdZ/sWg6KTpZl+6QYDcMKnVAyqruaPR6bokOczqZhURFWbKGiUenpbBr5aMyKPWLRqHQktgLrqG4juyk10chX2dGiUSneZpukGA1DDJ1QMqprgL1K6qPx57I5rKM6rNhCZaIBZ7Gz56MxK/Z8paLqkdgKRKPSjezW1EQjt7PjpKJqvMe2RzEahgQ6oVxU2gB7m9RH489iZ49GpYixtcpEA45n581HY2Ls4YpGpYOxRYhGpdvZDdoUHSLN6ovRMLzH9kYxGoY0OqFcVNoGe6fUR+OPZ+dNRdVIsOVKRdUHs5PmozEJ9nBFo9LB2CJEo9KX2G26MJoQXmJbojIajCw6oVxU2hJ7s1RGg49kZ8xEA5Bgy5WPxhzDzlWMhsXYk5WKqgdjixCNSl9it+nCaELYzvZDZTQYJXRCuai0VfaWOSI6U4mNykdjkGYrlo/GHMBOlI/GxNhjlYqqh2RLEY1Kt7M7dVU0G2xk26AyGow643ZCE9s666iuYfauOSE68Tf7tBgNQ5YtWjEath87fjEaFmPPVCYaMCpbjWhUup3dr/OjeWA72wM10UhUG7oTmtgGsqiobfbGaTmaMUps3WqikW+wA9ZH42PsgcpEA8Zma7KO6l5id+3kaBLYyDZAZTQY1UbvhCa2hywqapu9dJqNposKtnT10fiN7CD10fgEe5pSUfXwbFnWUd3b7CYeHZ0V1ey+b4oOgS3ohOiETormii1sDTdFh6hjYyujwQn2KKWiajzZ4lhUtAe7lcdF50M1u+mbokNgIzqhB9tMFhW1zd4+rUWzxHa2klujo2TZkMpocJo9R6moGk+2ONGodD92Z9+Pjovt7F5vig6B7eiEHmw/raO6ttnLqJ1ofniJLWYj0eTS7AnKRAPwZIsTjUpxO3ajK6PBeAOd0INtrGhU2jb7ctVCNDO8x1b12mhOafbsZKIB+GbrE41KcS92lyujwXgPndCD7a1UVH1H9qVur+jo2IOt7YXRhBLsqclEA7BkqxSNSnELdnMro8HYA53Qg+2wTDRgAPbFb2t0FOzKFvn8aB5p9rxkogGIsbVaR3Xon93Z+mg89kAn9GA7LB+NAa5grckJ0Ynr2MOSiqqRYMsVjUrRObutldFg7IROSGyf5aMxwHWsXzkoOlkde0wy0QAk2HJFo1L0ye5mZTQYe6MTEttwxWgYcDXrXfaNzlHBHpBMNABptmKpqBpdsZtYH43HAeiEftm2q4lGAlezDmaX6NAV7LnIR2NQYuu2jurQD7uD9dF4HINOaME2X000EhiSPQ7FaBgq2NJFo1L0wO5dfTQeh6ETcrYFi9EwYDD2INREI1HHVi8VVaNtdtfqo/E4Ep1QhG3EYjQMGIPt/8poMLawNYxGpWiY3bLKaDCORycUZzuyJhoJ3Jpt+8poMDayZYxGpWiV3a/KaDBOQSeUZPtyU3QI4F5sn9dH47GdrWQqqkZL7B5tig6BU9AJ5djW3BQdArgL2+H10Xi8xBYzEw1AA+zW1EfjcS46oQLbpi9EBwL6ZPt5a3QUvMGWNBVVowF2a2qikbgCnVAV27LvREcEemC7d2t0FLzHVjUTDcB17I5URoNxETqhWrZx94qODjTGNurW6CjYiS1vJhqA09mNqI/G4zp0QrVs7x4anRI4i+3Ad6IjYm+2zsVoGM5i618ZDcal6IQ2sB18dHRW4Ei2696Jjohj2GpXRoNxMFv2ymgwrkYntJlt5aui2QAvse30fnRcHMYWvD4aj8PYgldGg9EAOqG32M5uJ5ofEGO75f3ouDierfym6BDYj61wZTQYzaATepdt8cajSWNgtiXej46Ls9j6b42OgvfYqlZGg9EYOqHd2I4n7UR3aGC2ILtEh8bp7EbsFR0dJbZum6JDoDF0QkexB4C0Gd2tm7KL3TE6AS5it4O0H905NIlO6CT2VJAbRLe2MTbJ3aPT4Gp2X0iz0Q1Dw+iErmQPDOk9uq8XscnsEh0aDbNbRlqL7hMaRid0PXtsCGkh2p3ohN0+0kh0e9A2OqHW2XNFyKHRtkOH7FaSy6Mbg+bRCfXNHjxC3ol2FTpnt5WcH90JdIJO6J7ssSSkGG0d3IXdX3JOtProCp0Q7sNeSaQYLRxGYnuA7BWtLzpEJ4Qh2Dtr8GhRAAB0QkCRtRGdRhcDAFiiEwI2syaj8WjSAIAYOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiEAADAuOiGge19//3x+fPwT+vj4+vv55/fXPv+q9h1frx7w5YEAcDg6IaBvYZcR+Pj6+kMnBABFdEJA1xKN0D+fX//+pRMCgCI6IaBrfz/VYUw+/nzpV5/a6T/ohAC0q5VO6Ovvn8dPOvy+Lp+mX/j88+fv4u0O4Feux6ATAoCy6zuhr7+f1v9ETA0R7RCwRicEAO+5thOq6oJ+2Lf+gZGFfyq2Mj8rqf6j2Je8PnD1t9ge39b9+1XRCc0jw6Hfg1UAAMe4sBMKX45Pemd+fzy/VPWZJF6iwHCa64Ryv68JO5z1GUu/I+JbwgCOdFknZH3QR+q3fv6SjL64f/1+edj+HaR3xgIna6wTyk5nwc9YN5JnEsBRLuqEln1Q6SW3eFVG39w/3ulm3hkLXCPX1bzQ0Mw2D1x2M79/pvUV+XbP8oyLkfZd4eU3hXkqARzjmk4ofKHWvOBU//Hx+fnnz8+bMoZOCGPJdTWbG5pvGweGvxx5eBYf5w4Ye+qmVkqfJ+YKAG+6pBNavBn3fbvRCWEsiebkKfVZbszTtoHhr5aPF1YEXU76mQsGRw8OAO+5ohNavBZ3bjrohDCWeHMyS32WG/O0bWDQz8QPlxoY/moVHkwAB7i6E9r71UYnhLGkmpaH1Ge5MU/bBgadUPLJCWqiAytFZwsAb7miEwrff2/1HFtfpOHJ3hkLNOP6Tqh4tIeg6LeGTghAC+iEKtEJoUm5PmRTQxPaNjB4lpKPSTBy20AAOBqdUCXe1GhSrqvZ1tAEwqejPDBVHQpqfkuK8wCAE1z+E9M7vwB/X7jbW5d3xgLXyHUTqc9KHUjyCU0MLLdCiYrwl5MP3XfR899j5sEEsLsrOqG6999rfg9NJ4QRvNIJLZ7AVeuy6IOWn6cOGB4vcsDFx+mBH6uRPjZSAADvuqQTWrxq67qO+X34/E8r/s39k4y/7006IYzgpU5o2ez8/CehY//U32Jg5QEfPc38H0At/jembeDvyMhYHksAh7ikE1q+/ireb/X1dEIYS7I5maQ/8wYko6oT2nJEn+Xvc5fFQwngINd0Qvb2K73jNhTTCWEsr3VCk9y///4R/NvxlZ3Q5Gv9DaVvqQPOIt84WuIHhAAc6KpOyH8Lyb9FD7zi5U5oMv9p2PJBnP94KhgZDiwd8HlIP+Lzz96C380kHuFpLj6Z53SCf5MVAI5wWSe0fKs+Ld96kbd08iUKAADwigs7oUnpm+JL8X+rGgAA4FXXdkKT4o8IPPFzAgAA4ACXd0Iz/YyA90TP/5YaPyYAAAAO0kgnBAAAcAE6IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMC46IQAAMKp///1/G20Mn+7zPnsAAAAASUVORK5CYII=\n", "text/plain": [""]}, "execution_count": 83, "metadata": {}, "output_type": "execute_result"}], "source": ["from pyquickhelper.helpgen import NbImage\n", "NbImage(\"pycpp.png\")"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Ces r\u00e9sultats sont d'une fa\u00e7on g\u00e9n\u00e9rale assez volatile car le temps de calcul est enrob\u00e9 dans plusieurs fonctions Python qui rendent une mesure pr\u00e9cise difficile. Il reste n\u00e9anmoins une bonne id\u00e9e des ordres de grandeurs."]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Random Forest\n", "\n", "On reproduit les m\u00eames r\u00e9sultats pour une random forest mais la r\u00e9\u00e9criture n'est plus aussi simple qu'une r\u00e9gression lin\u00e9aire."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Une pr\u00e9diction \u00e0 la fois"]}, {"cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": ["from sklearn.datasets import load_diabetes\n", "diabetes = load_diabetes()\n", "diabetes_X_train = diabetes.data[:-20]\n", "diabetes_X_test = diabetes.data[-20:]\n", "diabetes_y_train = diabetes.target[:-20]\n", "diabetes_y_test = diabetes.target[-20:]"]}, {"cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [{"data": {"text/plain": ["RandomForestRegressor(n_estimators=10)"]}, "execution_count": 85, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.ensemble import RandomForestRegressor\n", "rf = RandomForestRegressor(n_estimators=10)\n", "rf.fit(diabetes_X_train, diabetes_y_train)"]}, {"cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Moyenne: 980.23 \u00b5s Ecart-type 60.93 \u00b5s (with 20 runs) in [937.55 \u00b5s, 1.11 ms]\n"]}], "source": ["memo_time = []\n", "x = diabetes_X_test[:1]\n", "memo_time.append(timeexe(\"sklearn-rf\", \"rf.predict(x)\", repeat=100, number=20))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est beaucoup plus long que la r\u00e9gression lin\u00e9aire. On essaye avec *onnx*."]}, {"cell_type": "code", "execution_count": 86, "metadata": {"scrolled": false}, "outputs": [], "source": ["if ok_onnx:\n", " onnxrf_model = convert_sklearn(\n", " rf, 'model', [('input', FloatTensorType([None, clr.coef_.shape[0]]))],\n", " target_opset=11)\n", " onnxrf_model.ir_version = 6\n", " save_model(onnxrf_model, 'model_rf.onnx') \n", " model_onnx = onnx.load('model_rf.onnx')"]}, {"cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Input: NodeArg(name='input', type='tensor(float)', shape=[None, 10])\n", "Output: NodeArg(name='variable', type='tensor(float)', shape=[None, 1])\n", "[array([[243.00002]], dtype=float32)]\n", "Moyenne: 14.36 \u00b5s Ecart-type 4.18 \u00b5s (with 20 runs) in [11.75 \u00b5s, 22.22 \u00b5s]\n"]}], "source": ["if ok_onnx:\n", " sess = onnxruntime.InferenceSession(\"model_rf.onnx\")\n", " for i in sess.get_inputs():\n", " print('Input:', i)\n", " for o in sess.get_outputs():\n", " print('Output:', o)\n", " \n", " def predict_onnxrt_rf(x): \n", " return sess.run([\"variable\"], {'input': x})\n", "\n", " print(predict_onnxrt_rf(x.astype(numpy.float32)))\n", " memo_time.append(timeexe(\"onnx-rf\", \"predict_onnxrt_rf(x.astype(numpy.float32))\",\n", " repeat=100, number=20))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est beaucoup plus rapide."]}, {"cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
averagedeviationfirstfirst3last3repeatmin5max5coderun
legend
onnx-rf0.0000140.0000040.0000470.0000270.0000141000.0000120.000022predict_onnxrt_rf(x.astype(numpy.float32))20
sklearn-rf0.0009800.0000610.0013080.0010870.0010751000.0009380.001106rf.predict(x)20
\n", "
"], "text/plain": [" average deviation first first3 last3 repeat \\\n", "legend \n", "onnx-rf 0.000014 0.000004 0.000047 0.000027 0.000014 100 \n", "sklearn-rf 0.000980 0.000061 0.001308 0.001087 0.001075 100 \n", "\n", " min5 max5 code \\\n", "legend \n", "onnx-rf 0.000012 0.000022 predict_onnxrt_rf(x.astype(numpy.float32)) \n", "sklearn-rf 0.000938 0.001106 rf.predict(x) \n", "\n", " run \n", "legend \n", "onnx-rf 20 \n", "sklearn-rf 20 "]}, "execution_count": 89, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "df2 = pandas.DataFrame(data=memo_time)\n", "df2 = df2.set_index(\"legend\").sort_values(\"average\")\n", "df2"]}, {"cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [{"name": "stderr", "output_type": "stream", "text": [":5: MatplotlibDeprecationWarning: The 'b' parameter of grid() has been renamed 'visible' since Matplotlib 3.5; support for the old name will be dropped two minor releases later.\n", " ax.grid(b=True, which=\"major\")\n", ":6: MatplotlibDeprecationWarning: The 'b' parameter of grid() has been renamed 'visible' since Matplotlib 3.5; support for the old name will be dropped two minor releases later.\n", " ax.grid(b=True, which=\"minor\");\n"]}, {"data": {"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1wAAAD/CAYAAADlhdcZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAR2klEQVR4nO3dfYxsd1kH8O9jL96SVmigUAtVmsibhmpNK2AscqvRplQgEkHLi1TBimjwpRHKi9pg1VuxpKCVWAwCLS9WUZRAkbcukSglbVMgjYiCxYZabCPFthRIy88/di4uN7u39+7MszOz+/kkk8ycM+c5z2/P7C/7zTlztsYYAQAAYPa+Zd4NAAAAbFcCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANNk17wYW3dFHHz2OP/74TW9/55135ogjjpiqh1nUoN9OPU7LNu5F6nere+ncX0ftWdWcts4ifWbY2E4+Tss29kXpdzvNwR31zcGH5pprrrl1jPGgdVeOMTwO8DjppJPGNK688sqptp9VDfrt1OO0bONepH63upfO/XXUnlXNaess0meGje3k47RsY1+UfrfTHNxR3xx8aJJcPTbIEy4pBAAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE0ELgAAgCY1xph3Dwtt97GPGMc+96JNb3/OCXfnwk/umqqHWdSg3049Tss27kXqd6t76dxfR+1Z1Zy2ziJ9ZtjYTj5Oyzb2Rel3O83BHfXnOQffsPeMbzxfWVnJnj17pu6jW1VdM8Y4eb11znABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE0ELgAAgCYCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADTZdOCqqvOq6rJDXbcIatVfVNUXq+pj8+4HAADYnnbNu4E5OSXJjyU5boxx57ybAQAAtqeluqSwqqYOiJMaD0tyg7AFAAB0OqgAU1UvSfKiJPdLclOSF+63/j5J3pzkW5Ocuc72j0/y6iTfk+RzSX51jLEyWfdzSV6c5LgktyS5YIzxZ5N1e5JcluSPk/x6kvdX1Wcmdb6S5CeT/GeS544xrt6g9/OSPGby/qck+c0kr0lyn6q6I8mFY4zfOZifAwAAsDk3v/Xcg3rfno++6hvPb7vtthx11FHrvm9lZWUGXfWrMcaB31D1qCQfSPK4McZNVXV8ksOSPCfJw5P8QpK/zmpYet4Y455JyHn4GOPZVfXQJJ+YvP+9SX40yduTPHqMcUtVnZHkU0k+m+SHk1yR5JQxxrWTwPWBJBcm+e2snpF7SZJzkzwtyT8kOT/JqWOMx2/Q/3lJXp7k6Un+PsnuJD+d5PljjFM22ObsJGcnyVEPfNBJr3zN6w/4MzqQY+6bfOGuTW8+sxr026nHadnGvUj9bnUvnfvrqD2rmtPWWaTPDBvbycdp2ca+KP1upzm4o37HHPza819+UNscsfv/zwndc889Oeyww9Z930UXXTRtezNz6qmnXjPGOHm9dQdzhuuerIaU76mqW8YYNyRJVSWrZ7zem+TjWT1rtV56e3aS94wx3jN5/f6qujrJk5K8aYzx7jXv/XBVvS/JE5JcO1n29SS/M8b46pr9fmRfvaq6NMmv3csY/nmM8c7J87smNTY0xrgkySVJsvvYR4wLP7n5KxnPOeHuTLP9rGrQb6cep2Ub9yL1u9W9dO6vo/asak5bZ5E+M2xsJx+nZRv7ovS7nebgjvotc/BTLzioba7be8Y3nq+srGTPnj1T9zFP9/odrjHGv2c10JyX5L+r6u1V9ZDJ6scn+d4kezcIW8nq96WeXlW37Xtk9aYVxyZJVZ1eVR+tqv+ZrHtSkqPXbH/LGOMr+9W8ec3zLyc5vKp2VdWzquqOyeOKNe+58d7GCQAAMGsHddOMMcZbJ5ffPSzJSLIvnr4vyR8k+WBVHbPB5jcmuXSMcdSaxxFjjL1VtTvJO5L8UZJjxhhHJXlPkrWnoA58zeM39/mWMcaRk8fpm6kBAAAwK/cauKrqUVX1I5Nw9JUkd2X1Mr8kyRjjD5O8Nauh6+h1SlyW5MlVdVpVHVZVh1fVnqo6Lqs32did1e9/3V1Vpyf58emHBQAAMH8Hc4Zrd5K9SW7N6qV8D07y0rVvGGP8bpJ3JvlAVT1gv3U3JnlqkpdlNVjdmNU7BX7LGOP2rN798PIkX0zyzKze2AIAAGDp3es34cYYn0jy2HVWnbff+16R5BUbrLsqyRM3qH9xkos3WLeS1dvFr122f+0b8s2XIO5f47x1lr0xyRs32gYAAGAWluofHwMAACwTgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE0ELgAAgCYCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAF3GGB4HeJx00kljGldeeeVU28+qBv126nFatnEvUr9b3Uvn/jpqz6rmtHUW6TPDxnbycVq2sS9Kv9tpDu6obw4+NEmuHhvkCWe4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE12zbuBRffJz38px5/77k1vf84Jd+esKbafVY1FdcPeM+bdAgAAtHGGCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE0ELgAAgCYCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAkx0ZuKrqUVV1XVXdXlUvmnc/AADA9rRr3g3MyYuTXDnGOHHejQAAANvXjjrDVVX7AubDklw/z14AAIDtrzVwVdV3V9VKVd1WVddX1VMmy99YVRdX1bsnl/VdVVXftWa7UVUvqKp/m2x7cVXVZN3rquoda957QVV9cN/6dXq4oapeUlWfSHJnVX0oyalJ/qSq7qiqR3b+DAAAgJ2r7ZLCqrpPkncleUOSH09ySpK/q6qTJ2/5mSSnJ7k2yZuS/N5k2T4/keQHktwvyTWTWu9Nck6S66rqrCSfSfK8JCeOMcYB2jkzyRlJbh1j3FVVK0kuG2P8+fQj3To3v/Xcebcwc3s++qp5t7ChlZWVebcAAMCSqwPnlCkKVz0hyV8lecgY4+uTZW9L8q9Jjk9y9xjj+ZPlT0ry6jHGoyevR5InjDE+Mnl9eZJrxxh7J68fl+SKJLcnOXeM8bYD9HFDkleOMd6wZtlKDhC4qursJGcnyVEPfNBJr3zN6zf5U0iOuW/yhbs2vfk31Xjt+S+frtACOmL34n6N8KKLLjqk999xxx058sgje5pZYMs27kXqd6t76dxfR+1Z1Zy2ziJ9ZtjYTj5Oyzb2Rel3O83BHfXNwYfm1FNPvWaMcfJ66zr/2n1Ikhv3ha2JzyV56OT5zWuWfznJ/j/JDdePMa6qqs8meXCSy/ctr6orkjxh8vIXxxhvmTy/8VAaH2NckuSSJNl97CPGhZ/c/I/pnBPuzjTbf1ONp14wVZ1FdN3eM+bdwsysrKxkz549825jyy3buBep363upXN/HbVnVXPaOov0mWFjO/k4LdvYF6Xf7TQHd9Q3B89O53e4bkryHVW1dh/fmeTz0xauql9OsnuyjxfvWz7GOH2MceTk8ZY1m/ScxgMAADiAzsB1VVbPTL24qu5TVXuSPDnJ26cpOrnJxflJnp3kOZP6J07VKQAAQIO2wDXG+FpWA9bpSW5N8qdJfnaM8anN1pzc1v2yJBeMMT4+xvi3JC9LcmlV7Z5B2wAAADPTeseCMcb1SZ64zvKz9nu9kuS4Na/rAO9/7H7rXpfkdQfo4fh1lu05QNsAAAAzsaP+8TEAAMBWErgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGgicAEAADQRuAAAAJoIXAAAAE0ELgAAgCYCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKCJwAUAANBE4AIAAGiya94NLLoTHnr/XL33jE1vv7KykhuetWeqHmZRAwAA2HrOcAEAADQRuAAAAJoIXAAAAE0ELgAAgCYCFwAAQBOBCwAAoInABQAA0ETgAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0EbgAAACaCFwAAABNBC4AAIAmAhcAAEATgQsAAKBJjTHm3cNCq6pbknxuihL3T/KlKds4OsmtU9ag3yyO9TJatnEvUr9b3Uvn/jpqz6rmtHXMwcthkX63t9qyjX1R+t1Oc3BHfXPwoXnYGONB660QuJpV1SVjjLOnrHH1GOPkWfVEj1kc62W0bONepH63upfO/XXUnlXNaeuYg5fDIv1ub7VlG/ui9Lud5uCO+ubg2XFJYb93zbsBtsxOPdbLNu5F6nere+ncX0ftWdVcpGNOn518nJdt7IvS73aagzvqm4NnxBmuJbAdkj3AsjIHA8zPdpiDneFaDpfMuwGAHcwcDDA/Sz8HO8MFAADQxBkuAACAJgIXAABAE4FrG6iq46vqlqpamTzW/R8AAPSpqjMn/7sRgC1UVcdU1T9V1Yer6kNVdey8e1pr17wbYGY+PMb4qXk3AbATVdVhSZ6e5MZ59wKwA92a5JQxxter6qwkz0ty/nxb+n/OcG0fP1RV/1hVv19VNe9mAHaYM5P8VZKvz7sRgJ1mjHHPGGPf/PttSa6fZz/7E7i2WFX9SlVdXVVfrao37rfuAVX1t1V1Z1V9rqqeeZBl/yvJw5P8cJIHJ3nabLsG2B465uDJ2a1nJPnLhpYBtpWmv4VTVSdW1VVJfiXJtTNueyouKdx6N2X1FOdpSe6737qLk3wtyTFJTkzy7qr6+Bjj+qr69iRvX6fez4wxbk7y1SSpqr9J8vgk7+hpH2CpzXwOntS6fHIpS1vjANtEy9/CY4zrkjyuqp6R5KVJXtDU/yHzf7jmpKrOT3LcGOOsyesjknwxyWPGGJ+eLLs0yefHGOfeS61vG2PcPnn+B0n+ZYzx5s7+AZbZjOfgC5J8f1YvJ/zBJG8aY7yosX2ApTfjefhbxxhfmzw/LclpY4zf6Oz/UDjDtTgemeTufR+wiY8neeJBbHvK5EP75ST/keS3GvoD2M42PQePMV6y73lVXS1sAWzKNH8Ln1hVf5TkniRfSfLzDf1tmsC1OI5M8r/7LftSVr/4d0BjjCuSXNHRFMAOsek5eK0xxskz6whgZ5nmb+GPZfVeBgvJTTMWxx1J7rffsvsluX0OvQDsNOZggPnatvOwwLU4Pp1kV1U9Ys2y78uC3dYSYJsyBwPM17adhwWuLVZVu6rq8CSHJTmsqg6vql1jjDuT/E2SV1bVEVX1Q0memuTSefYLsJ2YgwHmayfOwwLX1ntFkruSnJvk2ZPnr5ise2FWb4/530neluSXxhhLn+oBFog5GGC+dtw87LbwAAAATZzhAgAAaCJwAQAANBG4AAAAmghcAAAATQQuAACAJgIXAABAE4ELAACgicAFAADQROACAABoInABAAA0+T8d00DhC8yqtQAAAABJRU5ErkJggg==\n", "text/plain": ["
"]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["fig, ax = plt.subplots(1, 1, figsize=(14,4))\n", "df2[[\"average\", \"deviation\"]].plot(kind=\"barh\", logx=True, ax=ax, xerr=\"deviation\",\n", " legend=False, fontsize=12, width=0.8)\n", "ax.set_ylabel(\"\")\n", "ax.grid(b=True, which=\"major\")\n", "ax.grid(b=True, which=\"minor\");"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Pr\u00e9diction en batch"]}, {"cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["batch = 1\n", "Moyenne: 1.11 ms Ecart-type 145.19 \u00b5s (with 10 runs) in [1.03 ms, 1.54 ms]\n", "Moyenne: 15.70 \u00b5s Ecart-type 13.36 \u00b5s (with 10 runs) in [11.20 \u00b5s, 55.77 \u00b5s]\n", "batch = 10\n", "Moyenne: 1.14 ms Ecart-type 162.36 \u00b5s (with 10 runs) in [952.57 \u00b5s, 1.51 ms]\n", "Moyenne: 25.55 \u00b5s Ecart-type 9.43 \u00b5s (with 10 runs) in [17.37 \u00b5s, 42.15 \u00b5s]\n", "batch = 100\n", "Moyenne: 1.09 ms Ecart-type 80.51 \u00b5s (with 10 runs) in [1.01 ms, 1.31 ms]\n", "Moyenne: 38.04 \u00b5s Ecart-type 17.20 \u00b5s (with 10 runs) in [32.02 \u00b5s, 89.62 \u00b5s]\n", "batch = 200\n", "Moyenne: 1.42 ms Ecart-type 126.30 \u00b5s (with 10 runs) in [1.15 ms, 1.71 ms]\n", "Moyenne: 82.17 \u00b5s Ecart-type 56.27 \u00b5s (with 10 runs) in [43.86 \u00b5s, 213.17 \u00b5s]\n", "batch = 500\n", "Moyenne: 1.79 ms Ecart-type 543.34 \u00b5s (with 10 runs) in [1.31 ms, 3.18 ms]\n", "Moyenne: 130.31 \u00b5s Ecart-type 30.45 \u00b5s (with 10 runs) in [85.15 \u00b5s, 190.08 \u00b5s]\n", "batch = 1000\n", "Moyenne: 1.53 ms Ecart-type 93.12 \u00b5s (with 10 runs) in [1.42 ms, 1.70 ms]\n", "Moyenne: 249.60 \u00b5s Ecart-type 23.96 \u00b5s (with 10 runs) in [232.24 \u00b5s, 312.27 \u00b5s]\n", "batch = 2000\n", "Moyenne: 2.09 ms Ecart-type 149.23 \u00b5s (with 10 runs) in [1.89 ms, 2.33 ms]\n", "Moyenne: 393.37 \u00b5s Ecart-type 165.01 \u00b5s (with 10 runs) in [283.40 \u00b5s, 734.87 \u00b5s]\n", "batch = 3000\n", "Moyenne: 2.77 ms Ecart-type 921.32 \u00b5s (with 10 runs) in [2.24 ms, 5.40 ms]\n", "Moyenne: 432.57 \u00b5s Ecart-type 16.08 \u00b5s (with 10 runs) in [422.71 \u00b5s, 479.76 \u00b5s]\n", "batch = 4000\n", "Moyenne: 2.96 ms Ecart-type 331.99 \u00b5s (with 10 runs) in [2.63 ms, 3.69 ms]\n", "Moyenne: 1.04 ms Ecart-type 485.53 \u00b5s (with 10 runs) in [598.92 \u00b5s, 2.38 ms]\n", "batch = 5000\n", "Moyenne: 3.27 ms Ecart-type 348.48 \u00b5s (with 10 runs) in [3.00 ms, 4.16 ms]\n", "Moyenne: 996.95 \u00b5s Ecart-type 207.84 \u00b5s (with 10 runs) in [767.12 \u00b5s, 1.47 ms]\n", "batch = 10000\n", "Moyenne: 5.26 ms Ecart-type 404.81 \u00b5s (with 10 runs) in [4.96 ms, 6.34 ms]\n", "Moyenne: 1.75 ms Ecart-type 317.18 \u00b5s (with 10 runs) in [1.34 ms, 2.13 ms]\n", "batch = 20000\n", "Moyenne: 10.52 ms Ecart-type 1.11 ms (with 10 runs) in [9.21 ms, 13.42 ms]\n", "Moyenne: 4.40 ms Ecart-type 522.54 \u00b5s (with 10 runs) in [3.52 ms, 5.43 ms]\n", "batch = 50000\n", "Moyenne: 24.33 ms Ecart-type 2.90 ms (with 10 runs) in [21.27 ms, 29.83 ms]\n", "Moyenne: 8.21 ms Ecart-type 1.31 ms (with 10 runs) in [7.32 ms, 11.74 ms]\n", "batch = 75000\n", "Moyenne: 31.54 ms Ecart-type 251.81 \u00b5s (with 10 runs) in [31.19 ms, 32.06 ms]\n", "Moyenne: 12.22 ms Ecart-type 908.21 \u00b5s (with 10 runs) in [11.38 ms, 14.24 ms]\n", "batch = 100000\n", "Moyenne: 42.05 ms Ecart-type 745.44 \u00b5s (with 10 runs) in [41.22 ms, 43.35 ms]\n", "Moyenne: 16.17 ms Ecart-type 1.01 ms (with 10 runs) in [14.98 ms, 17.54 ms]\n"]}], "source": ["memo = []\n", "batch = [1, 10, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000,\n", " 20000, 50000, 75000, 100000, 150000, 200000, 300000, 400000,\n", " 500000, 600000]\n", "number = 10\n", "repeat = 10\n", "for i in batch[:15]:\n", " if i <= diabetes_X_test.shape[0]:\n", " mx = diabetes_X_test[:i]\n", " else:\n", " mxs = [diabetes_X_test] * (i // diabetes_X_test.shape[0] + 1)\n", " mx = numpy.vstack(mxs)\n", " mx = mx[:i]\n", "\n", " print(\"batch\", \"=\", i)\n", " \n", " memo.append(timeexe(\"sklearn.predict %d\" % i, \"rf.predict(mx)\", \n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"sklearn\"\n", " \n", " if ok_onnx:\n", " memo.append(timeexe(\"onnxruntime %d\" % i, \n", " \"predict_onnxrt_rf(mx.astype(numpy.float32))\",\n", " repeat=repeat, number=number))\n", " memo[-1][\"batch\"] = i\n", " memo[-1][\"lib\"] = \"onnxruntime\""]}, {"cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [{"data": {"image/png": "\n", "text/plain": ["
"]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["dfbrf = pandas.DataFrame(memo)[[\"average\", \"lib\", \"batch\"]]\n", "pivrf = dfbrf.pivot(\"batch\", \"lib\", \"average\")\n", "for c in pivrf.columns:\n", " pivrf[\"ave_\" + c] = pivrf[c] / pivrf.index\n", "libs = list(c for c in pivrf.columns if \"ave_\" in c)\n", "ax = pivrf.plot(y=libs, logy=True, logx=True, figsize=(10, 5))\n", "ax.set_title(\"Evolution du temps de pr\u00e9diction selon la taille du batch\\nrandom forest\")\n", "ax.grid(True);"]}, {"cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5"}}, "nbformat": 4, "nbformat_minor": 2}