{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Converts a logistic regression into C\n", "\n", "The logistic regression is trained in python and executed in C."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["<div id=\"my_id_menu_nb\">run previous cell, wait for 2 seconds</div>\n", "<script>\n", "function repeat_indent_string(n){\n", "    var a = \"\" ;\n", "    for ( ; n > 0 ; --n)\n", "        a += \"    \";\n", "    return a;\n", "}\n", "// look up into all sections and builds an automated menu //\n", "var update_menu_string = function(begin, lfirst, llast, sformat, send, keep_item, begin_format, end_format) {\n", "    var anchors = document.getElementsByClassName(\"section\");\n", "    if (anchors.length == 0) {\n", "        anchors = document.getElementsByClassName(\"text_cell_render rendered_html\");\n", "    }\n", "    var i,t;\n", "    var text_menu = begin;\n", "    var text_memo = \"<pre>\\nlength:\" + anchors.length + \"\\n\";\n", "    var ind = \"\";\n", "    var memo_level = 1;\n", "    var href;\n", "    var tags = [];\n", "    var main_item = 0;\n", "    var format_open = 0;\n", "    for (i = 0; i <= llast; i++)\n", "        tags.push(\"h\" + i);\n", "\n", "    for (i = 0; i < anchors.length; i++) {\n", "        text_memo += \"**\" + anchors[i].id + \"--\\n\";\n", "\n", "        var child = null;\n", "        for(t = 0; t < tags.length; t++) {\n", "            var r = anchors[i].getElementsByTagName(tags[t]);\n", "            if (r.length > 0) {\n", "child = r[0];\n", "break;\n", "            }\n", "        }\n", "        if (child == null) {\n", "            text_memo += \"null\\n\";\n", "            continue;\n", "        }\n", "        if (anchors[i].hasAttribute(\"id\")) {\n", "            // when converted in RST\n", "            href = anchors[i].id;\n", "            text_memo += \"#1-\" + href;\n", "            // passer \u00e0 child suivant (le chercher)\n", "        }\n", "        else if (child.hasAttribute(\"id\")) {\n", "            // in a notebook\n", "            href = child.id;\n", "            text_memo += \"#2-\" + href;\n", "        }\n", "        else {\n", "            text_memo += \"#3-\" + \"*\" + \"\\n\";\n", "            continue;\n", "        }\n", "        var title = child.textContent;\n", "        var level = parseInt(child.tagName.substring(1,2));\n", "\n", "        text_memo += \"--\" + level + \"?\" + lfirst + \"--\" + title + \"\\n\";\n", "\n", "        if ((level < lfirst) || (level > llast)) {\n", "            continue ;\n", "        }\n", "        if (title.endsWith('\u00b6')) {\n", "            title = title.substring(0,title.length-1).replace(\"<\", \"&lt;\")\n", "         .replace(\">\", \"&gt;\").replace(\"&\", \"&amp;\");\n", "        }\n", "        if (title.length == 0) {\n", "            continue;\n", "        }\n", "\n", "        while (level < memo_level) {\n", "            text_menu += end_format + \"</ul>\\n\";\n", "            format_open -= 1;\n", "            memo_level -= 1;\n", "        }\n", "        if (level == lfirst) {\n", "            main_item += 1;\n", "        }\n", "        if (keep_item != -1 && main_item != keep_item + 1) {\n", "            // alert(main_item + \" - \" + level + \" - \" + keep_item);\n", "            continue;\n", "        }\n", "        while (level > memo_level) {\n", "            text_menu += \"<ul>\\n\";\n", "            memo_level += 1;\n", "        }\n", "        text_menu += repeat_indent_string(level-2);\n", "        text_menu += begin_format + sformat.replace(\"__HREF__\", href).replace(\"__TITLE__\", title);\n", "        format_open += 1;\n", "    }\n", "    while (1 < memo_level) {\n", "        text_menu += end_format + \"</ul>\\n\";\n", "        memo_level -= 1;\n", "        format_open -= 1;\n", "    }\n", "    text_menu += send;\n", "    //text_menu += \"\\n\" + text_memo;\n", "\n", "    while (format_open > 0) {\n", "        text_menu += end_format;\n", "        format_open -= 1;\n", "    }\n", "    return text_menu;\n", "};\n", "var update_menu = function() {\n", "    var sbegin = \"\";\n", "    var sformat = '<a href=\"#__HREF__\">__TITLE__</a>';\n", "    var send = \"\";\n", "    var begin_format = '<li>';\n", "    var end_format = '</li>';\n", "    var keep_item = -1;\n", "    var text_menu = update_menu_string(sbegin, 2, 4, sformat, send, keep_item,\n", "       begin_format, end_format);\n", "    var menu = document.getElementById(\"my_id_menu_nb\");\n", "    menu.innerHTML=text_menu;\n", "};\n", "window.setTimeout(update_menu,2000);\n", "            </script>"], "text/plain": ["<IPython.core.display.HTML object>"]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["## Train a linear regression"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [{"data": {"text/plain": ["LogisticRegression()"]}, "execution_count": 3, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.linear_model import LogisticRegression\n", "from sklearn.datasets import load_iris\n", "iris = load_iris()\n", "X = iris.data[:, :2]\n", "y = iris.target\n", "y[y == 2] = 1\n", "lr = LogisticRegression()\n", "lr.fit(X, y)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Export  into C"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [{"data": {"text/plain": ["<mlprodict.grammar.gmlactions.MLModel at 0x21564d49828>"]}, "execution_count": 4, "metadata": {}, "output_type": "execute_result"}], "source": ["# grammar is the expected scoring model.\n", "from mlprodict.grammar_sklearn import sklearn2graph\n", "gr = sklearn2graph(lr, output_names=['Prediction', 'Score'])\n", "gr"]}, {"cell_type": "markdown", "metadata": {}, "source": ["We can even check what the function should produce as a score. Types are strict."]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["[  0.       -11.264062]\n"]}], "source": ["import numpy\n", "X = numpy.array([[numpy.float32(1), numpy.float32(2)]])\n", "e2 = gr.execute(Features=X[0, :])\n", "print(e2)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["We compare with scikit-learn."]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([-11.26406172])"]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["lr.decision_function(X[0:1, :])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Conversion into C:"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["int LogisticRegression (float* pred, float* Features)\n", "{\n", "    // 2290909222952-LogisticRegression - children\n", "    // 2290909222728-concat - children\n", "    // 2290909222672-sign - children\n", "    // 2290909222616-+ - children\n", "    // 2290909222560-adot - children\n", "    float pred0c0c00c0[2] = {(float)3.3882975578308105, (float)-3.164527654647827};\n", "    float* pred0c0c00c1 = Features;\n", "    // 2290909222560-adot - itself\n", "    float pred0c0c00;\n", "    adot_float_float(&pred0c0c00, pred0c0c00c0, pred0c0c00c1, 2);\n", "    // 2290909222560-adot - done\n", "    float pred0c0c01 = (float)-8.323304176330566;\n", "    // 2290909222616-+ - itself\n", "    float pred0c0c0 = pred0c0c00 + pred0c0c01;\n", "    // 2290909222616-+ - done\n", "    // 2290909222672-sign - itself\n", "    float pred0c0;\n", "    sign_float(&pred0c0, pred0c0c0);\n", "    // 2290909222672-sign - done\n", "    // 2290909222728-concat - itself\n", "    float pred0[2];\n", "    concat_float_float(pred0, pred0c0, pred0c0c0);\n", "    // 2290909222728-concat - done\n", "    memcpy(pred, pred0, 2*sizeof(float));\n", "    // 2290909222952-LogisticRegression - itself\n", "    return 0;\n", "    // 2290909222952-LogisticRegression - done\n", "}\n"]}], "source": ["res = gr.export(lang='c', hook={'array': lambda v: v.tolist(), 'float32': lambda v: float(v)})\n", "print(res[\"code\"])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["We execute the code with module [cffi](https://cffi.readthedocs.io/en/latest/)."]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["<function mlprodict.grammar_sklearn.cc.c_compilation.compile_c_function.<locals>.wrapper_float(features, output=None)>"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["from mlprodict.grammar_sklearn.cc import compile_c_function\n", "fct = compile_c_function(res[\"code\"], 2)\n", "fct"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([  0.      , -11.264062], dtype=float32)"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["e2 = fct(X[0, :])\n", "e2"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Time comparison"]}, {"cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["64.9 \u00b5s \u00b1 5.84 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 10000 loops each)\n"]}], "source": ["%timeit lr.decision_function(X[0:1, :])"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["6.17 \u00b5s \u00b1 380 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["%timeit fct(X[0, :])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["There is a significant speedup on this example. It could be even faster by removing some Python part and optimizing the code produced by [cffi](https://cffi.readthedocs.io/en/latest/). We can also save the creation of the array which contains the output by reusing an existing one."]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": ["out = fct(X[0, :])"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["6.33 \u00b5s \u00b1 430 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["%timeit fct(X[0, :], out)"]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2"}}, "nbformat": 4, "nbformat_minor": 2}