{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Measures a vector sum with different accumulator type\n", "\n", "This notebook compares how fast is the sum if the accumulator used to store the sum is of a different type than the summed elements."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", ""], "text/plain": [""]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["%matplotlib inline"]}, {"cell_type": "markdown", "metadata": {}, "source": ["[numpy](http://www.numpy.org/) is multithreaded. For an accurate comparison, this needs to be disabled. This can be done as follows or by setting environment variable ``MKL_NUM_THREADS=1``."]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": ["try:\n", " import mkl\n", " mkl.set_num_threads(1)\n", "except ModuleNotFoundError as e:\n", " print('mkl not found', e)\n", " import os\n", " os.environ['MKL_NUM_THREADS']='1'"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## First comparison\n", "\n", "We compare the two following implementation."]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": ["# float scenario_Float(const float *p1, size_t size)\n", "# {\n", "# float sum = 0;\n", "# const float * end1 = p1 + size;\n", "# for(; p1 != end1; ++p1)\n", "# sum += *p1;\n", "# return sum;\n", "# }\n", "# \n", "# float scenario_Double(const float *p1, size_t size)\n", "# {\n", "# double sum = 0;\n", "# const float * end1 = p1 + size;\n", "# for(; p1 != end1; ++p1)\n", "# sum += *p1;\n", "# return (float)sum;\n", "# }"]}, {"cell_type": "markdown", "metadata": {}, "source": ["The third line is also repeated 10 times to avoid the loop being too significant. "]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": ["from cpyquickhelper.numbers.cbenchmark_sum_type import measure_scenario_Double, measure_scenario_Float"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " \n", " average \n", " deviation \n", " doc \n", " label \n", " max_exec \n", " min_exec \n", " name \n", " number \n", " repeat \n", " \n", " \n", " \n", " \n", " 0 \n", " 7.664200e-09 \n", " 7.683250e-08 \n", " a double accumulator \n", " sorted \n", " 7.910000e-07 \n", " 3.950000e-07 \n", " scenario_Double \n", " 100.0 \n", " 100.0 \n", " \n", " \n", " 1 \n", " 5.096300e-09 \n", " 5.378331e-08 \n", " a float accumulator \n", " sorted \n", " 7.910000e-07 \n", " 3.950000e-07 \n", " scenario_Float \n", " 100.0 \n", " 100.0 \n", " \n", " \n", "
\n", "
"], "text/plain": [" average deviation doc label max_exec \\\n", "0 7.664200e-09 7.683250e-08 a double accumulator sorted 7.910000e-07 \n", "1 5.096300e-09 5.378331e-08 a float accumulator sorted 7.910000e-07 \n", "\n", " min_exec name number repeat \n", "0 3.950000e-07 scenario_Double 100.0 100.0 \n", "1 3.950000e-07 scenario_Float 100.0 100.0 "]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "import numpy\n", "\n", "def test_benchmark(label, values, repeat=100, number=100):\n", " funcs = [(k, v) for k, v in globals().copy().items() if k.startswith(\"measure_scenario\")]\n", " rows = []\n", " for k, v in funcs:\n", " exe = v(values, repeat, number)\n", " d = exe.todict()\n", " d['doc'] = \" \".join(v.__doc__.split('ExecutionStat')[1].strip().split(' ')[-3:]).strip('.')\n", " d['label'] = label\n", " d['name'] = k.replace(\"measure_\", \"\")\n", " rows.append(d) \n", " df = pandas.DataFrame(rows)\n", " return df\n", "\n", "test_benchmark(\"sorted\", numpy.random.rand(10).astype(numpy.float32))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Times are not very conclusive on such small lists."]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " \n", " average \n", " deviation \n", " doc \n", " label \n", " max_exec \n", " min_exec \n", " name \n", " number \n", " repeat \n", " \n", " \n", " \n", " \n", " 0 \n", " 0.000140 \n", " 0.00143 \n", " a double accumulator \n", " sorted \n", " 0.042079 \n", " 0.013098 \n", " scenario_Double \n", " 100.0 \n", " 100.0 \n", " \n", " \n", " 1 \n", " 0.000141 \n", " 0.00143 \n", " a float accumulator \n", " sorted \n", " 0.037468 \n", " 0.013094 \n", " scenario_Float \n", " 100.0 \n", " 100.0 \n", " \n", " \n", "
\n", "
"], "text/plain": [" average deviation doc label max_exec min_exec \\\n", "0 0.000140 0.00143 a double accumulator sorted 0.042079 0.013098 \n", "1 0.000141 0.00143 a float accumulator sorted 0.037468 0.013094 \n", "\n", " name number repeat \n", "0 scenario_Double 100.0 100.0 \n", "1 scenario_Float 100.0 100.0 "]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["values = numpy.random.rand(100000).astype(numpy.float32)\n", "\n", "df = test_benchmark(\"sorted\", values)\n", "df"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": ["df = pandas.concat([df])\n", "dfg = df[[\"doc\", \"label\", \"average\"]].pivot(\"doc\", \"label\", \"average\")"]}, {"cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [{"data": {"image/png": "\n", "text/plain": [""]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["ax = dfg.plot.bar(rot=30)\n", "labels = [l.get_text() for l in ax.get_xticklabels()]\n", "ax.set_xticklabels(labels, ha='right')\n", "ax.set_title(\"Comparison of all implementations\");"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## For different sizes"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " \n", " average \n", " deviation \n", " doc \n", " label \n", " max_exec \n", " min_exec \n", " name \n", " number \n", " repeat \n", " size \n", " \n", " \n", " \n", " \n", " 0 \n", " 1.418275e-07 \n", " 0.000001 \n", " a double accumulator \n", " sorted \n", " 0.000026 \n", " 0.000013 \n", " scenario_Double \n", " 100.0 \n", " 20.0 \n", " 100.0 \n", " \n", " \n", " 1 \n", " 1.590125e-07 \n", " 0.000002 \n", " a float accumulator \n", " sorted \n", " 0.000043 \n", " 0.000012 \n", " scenario_Float \n", " 100.0 \n", " 20.0 \n", " 100.0 \n", " \n", " \n", " 0 \n", " 1.380739e-06 \n", " 0.000014 \n", " a double accumulator \n", " sorted \n", " 0.000190 \n", " 0.000127 \n", " scenario_Double \n", " 100.0 \n", " 20.0 \n", " 1000.0 \n", " \n", " \n", " 1 \n", " 1.292245e-06 \n", " 0.000013 \n", " a float accumulator \n", " sorted \n", " 0.000161 \n", " 0.000124 \n", " scenario_Float \n", " 100.0 \n", " 20.0 \n", " 1000.0 \n", " \n", " \n", " 0 \n", " 1.415682e-05 \n", " 0.000142 \n", " a double accumulator \n", " sorted \n", " 0.001969 \n", " 0.001293 \n", " scenario_Double \n", " 100.0 \n", " 20.0 \n", " 10000.0 \n", " \n", " \n", "
\n", "
"], "text/plain": [" average deviation doc label max_exec min_exec \\\n", "0 1.418275e-07 0.000001 a double accumulator sorted 0.000026 0.000013 \n", "1 1.590125e-07 0.000002 a float accumulator sorted 0.000043 0.000012 \n", "0 1.380739e-06 0.000014 a double accumulator sorted 0.000190 0.000127 \n", "1 1.292245e-06 0.000013 a float accumulator sorted 0.000161 0.000124 \n", "0 1.415682e-05 0.000142 a double accumulator sorted 0.001969 0.001293 \n", "\n", " name number repeat size \n", "0 scenario_Double 100.0 20.0 100.0 \n", "1 scenario_Float 100.0 20.0 100.0 \n", "0 scenario_Double 100.0 20.0 1000.0 \n", "1 scenario_Float 100.0 20.0 1000.0 \n", "0 scenario_Double 100.0 20.0 10000.0 "]}, "execution_count": 11, "metadata": {}, "output_type": "execute_result"}], "source": ["dfs = []\n", "\n", "for i in range(2, 7):\n", " n = 10 ** i\n", " values = numpy.random.rand(n).astype(numpy.float32)\n", " df = test_benchmark(\"sorted\", values, repeat=20)\n", " df[\"size\"] = float(n)\n", " dfs.append(df)\n", " \n", "df = pandas.concat(dfs)\n", "df.head()"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " doc \n", " size \n", " a double accumulator \n", " a float accumulator \n", " ratio \n", " \n", " \n", " \n", " \n", " 0 \n", " 100.0 \n", " 1.418275e-07 \n", " 1.590125e-07 \n", " 0.891927 \n", " \n", " \n", " 1 \n", " 1000.0 \n", " 1.380739e-06 \n", " 1.292245e-06 \n", " 1.068480 \n", " \n", " \n", " 2 \n", " 10000.0 \n", " 1.415682e-05 \n", " 1.347988e-05 \n", " 1.050218 \n", " \n", " \n", " 3 \n", " 100000.0 \n", " 1.558953e-04 \n", " 1.326750e-04 \n", " 1.175017 \n", " \n", " \n", " 4 \n", " 1000000.0 \n", " 1.391328e-03 \n", " 1.367595e-03 \n", " 1.017354 \n", " \n", " \n", "
\n", "
"], "text/plain": ["doc size a double accumulator a float accumulator ratio\n", "0 100.0 1.418275e-07 1.590125e-07 0.891927\n", "1 1000.0 1.380739e-06 1.292245e-06 1.068480\n", "2 10000.0 1.415682e-05 1.347988e-05 1.050218\n", "3 100000.0 1.558953e-04 1.326750e-04 1.175017\n", "4 1000000.0 1.391328e-03 1.367595e-03 1.017354"]}, "execution_count": 12, "metadata": {}, "output_type": "execute_result"}], "source": ["piv = df.pivot(\"size\", \"doc\", \"average\")\n", "cols = piv.columns\n", "piv = piv.reset_index(drop=False)\n", "piv[\"ratio\"] = piv[\"a double accumulator\"] / piv[\"a float accumulator\"]\n", "piv"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"data": {"image/png": "\n", "text/plain": [""]}, "metadata": {"needs_background": "light"}, "output_type": "display_data"}], "source": ["ax = piv.plot(x='size', y=cols, logy=True, logx=True)\n", "ax.set_title(\"Compares float and double accumulator for a sum.\");"]}, {"cell_type": "markdown", "metadata": {}, "source": ["There is almost no difference."]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2"}}, "nbformat": 4, "nbformat_minor": 2}