{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "96d52364-7a21-4cab-ad30-0f0144667563", "metadata": {}, "source": [ "# Performances of 2D integration vs 1D integration\n", "\n", "This is dependant on:\n", "* Number of azimuthal bins\n", "* Pixel splitting\n", "* Algorithm\n", "* Implementation (i.e. programming language)\n", "* Hardware used\n", "\n", "Thus there is no general answer. But here is a quick benchmark to evaluate the penality on performances:\n", "\n", "import sys\n", "import os\n", "import time\n", "import numpy\n", "import fabio\n", "import pyFAI\n", "from pyFAI.test.utilstest import UtilsTest\n", "import pyFAI.method_registry\n", "import pyFAI.integrator.azimuthal\n", "print(f\"Python version: {sys.version}\")\n", "print(f\"PyFAI version: {pyFAI.version}\")\n", "start_time = time.perf_counter()" ] }, { "cell_type": "code", "execution_count": 1, "id": "332f573a-e5b6-4865-aff7-def54aa54faa", "metadata": {}, "outputs": [], "source": [ "import sys\n", "import os\n", "import time\n", "import numpy\n", "\n", "os.environ[\"PYOPENCL_COMPILER_OUTPUT\"] = \"0\"\n", "start_time = time.perf_counter()" ] }, { "cell_type": "code", "execution_count": 2, "id": "367b3b78-04d9-4f47-85e5-0f11c4383a70", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python version: 3.13.1 | packaged by conda-forge | (main, Jan 13 2025, 09:53:10) [GCC 13.3.0]\n", "PyFAI version: 2025.12.0\n" ] } ], "source": [ "import fabio\n", "import pyFAI\n", "from pyFAI.test.utilstest import UtilsTest\n", "import pyFAI.method_registry\n", "import pyFAI.integrator.azimuthal\n", "print(f\"Python version: {sys.version}\")\n", "print(f\"PyFAI version: {pyFAI.version}\")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "b3a6bd65-9b17-4c20-ad4a-870242eeed1b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of way to performing integration: 95\n" ] } ], "source": [ "print(\"Number of way to performing integration:\", len(pyFAI.method_registry.IntegrationMethod.list_available()))" ] }, { "cell_type": "code", "execution_count": 4, "id": "1479c126-f758-42e0-9209-3c6af91ada5a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Detector Pilatus 1M\t PixelSize= 172µm, 172µm\t BottomRight (3)\n", "Wavelength= 1.000000 Å\n", "SampleDetDist= 1.583231e+00 m\tPONI= 3.341702e-02, 4.122778e-02 m\trot1=0.006487 rot2=0.007558 rot3=0.000000 rad\n", "DirectBeamDist= 1583.310 mm\tCenter: x=179.981, y=263.859 pix\tTilt= 0.571° tiltPlanRotation= 130.640° λ= 1.000Å" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ai = pyFAI.load(UtilsTest.getimage(\"Pilatus1M.poni\"))\n", "img = fabio.open(UtilsTest.getimage(\"Pilatus1M.edf\")).data\n", "ai" ] }, { "cell_type": "code", "execution_count": 5, "id": "aea1ab6d-afc5-4152-a60b-8b25dc7d460b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Method(dim=1, split='no', algo='histogram', impl='python', target=None)\n", "30.1 ms ± 436 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", "Method(dim=2, split='no', algo='histogram', impl='python', target=None)\n", "115 ms ± 734 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", "Method(dim=1, split='no', algo='histogram', impl='cython', target=None)\n", "11.2 ms ± 29.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='histogram', impl='cython', target=None)\n", "16.7 ms ± 1.16 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='histogram', impl='cython', target=None)\n", "26.1 ms ± 94.7 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", "Method(dim=2, split='bbox', algo='histogram', impl='cython', target=None)\n", "32.5 ms ± 48.4 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", "Method(dim=1, split='full', algo='histogram', impl='cython', target=None)\n", "169 ms ± 3.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", "Method(dim=2, split='full', algo='histogram', impl='cython', target=None)\n", "319 ms ± 3.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='pseudo', algo='histogram', impl='cython', target=None)\n", "358 ms ± 2.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csr', impl='cython', target=None)\n", "8.54 ms ± 804 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='csr', impl='cython', target=None)\n", "16.8 ms ± 6.51 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='cython', target=None)\n", "10.7 ms ± 4.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='csr', impl='cython', target=None)\n", "12.2 ms ± 2.75 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='no', algo='csr', impl='python', target=None)\n", "10.4 ms ± 325 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='csr', impl='python', target=None)\n", "14.7 ms ± 247 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='python', target=None)\n", "14.2 ms ± 389 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='csr', impl='python', target=None)\n", "16.6 ms ± 106 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csc', impl='cython', target=None)\n", "8.14 ms ± 30.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='csc', impl='cython', target=None)\n", "10.6 ms ± 26.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csc', impl='cython', target=None)\n", "10.5 ms ± 59.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='csc', impl='cython', target=None)\n", "13.9 ms ± 69.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='no', algo='csc', impl='python', target=None)\n", "10.9 ms ± 13 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='csc', impl='python', target=None)\n", "14.2 ms ± 31 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csc', impl='python', target=None)\n", "14.7 ms ± 22.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='csc', impl='python', target=None)\n", "21.6 ms ± 34.2 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='bbox', algo='lut', impl='cython', target=None)\n", "10.4 ms ± 3.01 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='lut', impl='cython', target=None)\n", "15.7 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='lut', impl='cython', target=None)\n", "11.9 ms ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='lut', impl='cython', target=None)\n", "10.9 ms ± 3.15 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='full', algo='lut', impl='cython', target=None)\n", "16.6 ms ± 5.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='full', algo='lut', impl='cython', target=None)\n", "11.6 ms ± 170 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='cython', target=None)\n", "12.7 ms ± 4.05 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='csr', impl='cython', target=None)\n", "21.1 ms ± 2.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='python', target=None)\n", "12.8 ms ± 11.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='csr', impl='python', target=None)\n", "17.1 ms ± 82 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csc', impl='cython', target=None)\n", "10.3 ms ± 9.66 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='csc', impl='cython', target=None)\n", "14.1 ms ± 20.8 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csc', impl='python', target=None)\n", "15 ms ± 68.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='csc', impl='python', target=None)\n", "22 ms ± 42.7 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='histogram', impl='opencl', target=(0, 0))\n", "9.41 ms ± 5.63 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='histogram', impl='opencl', target=(0, 0))\n", "2.71 ms ± 13 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='no', algo='histogram', impl='opencl', target=(0, 1))\n", "8.48 ms ± 24.7 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='histogram', impl='opencl', target=(0, 1))\n", "4.22 ms ± 9.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='no', algo='histogram', impl='opencl', target=(1, 0))\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "1 error generated.\n", "WARNING:pyFAI.opencl.azim_hist:Your OpenCL compiler wrongly claims it support 64-bit atomics. Degrading to 32 bits atomics!\n", "1 error generated.\n", "WARNING:pyFAI.opencl.azim_hist:Your OpenCL compiler wrongly claims it support 64-bit atomics. Degrading to 32 bits atomics!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "14.9 ms ± 523 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='no', algo='histogram', impl='opencl', target=(1, 0))\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/users/kieffer/.venv/py313/lib/python3.13/site-packages/pyopencl/cache.py:496: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more.\n", " _create_built_program_from_source_cached(\n", "/users/kieffer/.venv/py313/lib/python3.13/site-packages/pyopencl/cache.py:500: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more.\n", " prg.build(options_bytes, devices)\n", "WARNING:pyFAI.opencl.azim_hist:Your OpenCL compiler wrongly claims it support 64-bit atomics. Degrading to 32 bits atomics!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "11.2 ms ± 960 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='histogram', impl='opencl', target=(2, 0))\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:pyFAI.opencl.azim_hist:Your OpenCL compiler wrongly claims it support 64-bit atomics. Degrading to 32 bits atomics!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "11.5 ms ± 254 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='histogram', impl='opencl', target=(2, 0))\n", "7.7 ms ± 220 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='opencl', target=(0, 0))\n", "704 μs ± 2.68 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='bbox', algo='csr', impl='opencl', target=(0, 0))\n", "2.68 ms ± 69.8 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csr', impl='opencl', target=(0, 0))\n", "660 μs ± 1.57 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='no', algo='csr', impl='opencl', target=(0, 0))\n", "2.59 ms ± 14.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='opencl', target=(0, 1))\n", "1.22 ms ± 3.48 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='bbox', algo='csr', impl='opencl', target=(0, 1))\n", "6.12 ms ± 33.3 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csr', impl='opencl', target=(0, 1))\n", "1.08 ms ± 1.54 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='no', algo='csr', impl='opencl', target=(0, 1))\n", "6.06 ms ± 12 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='opencl', target=(1, 0))\n", "3.78 ms ± 40.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='csr', impl='opencl', target=(1, 0))\n", "7.71 ms ± 208 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csr', impl='opencl', target=(1, 0))\n", "3.06 ms ± 57.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='csr', impl='opencl', target=(1, 0))\n", "6.21 ms ± 95.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=1, split='bbox', algo='csr', impl='opencl', target=(2, 0))\n", "3.94 ms ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='bbox', algo='csr', impl='opencl', target=(2, 0))\n", "82.4 ms ± 118 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='csr', impl='opencl', target=(2, 0))\n", "2.38 ms ± 398 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='no', algo='csr', impl='opencl', target=(2, 0))\n", "84.1 ms ± 2.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='opencl', target=(0, 0))\n", "705 μs ± 1.02 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='full', algo='csr', impl='opencl', target=(0, 0))\n", "2.62 ms ± 80.4 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='opencl', target=(0, 1))\n", "1.22 ms ± 804 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='full', algo='csr', impl='opencl', target=(0, 1))\n", "6.11 ms ± 39 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='opencl', target=(1, 0))\n", "4.2 ms ± 22.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='csr', impl='opencl', target=(1, 0))\n", "7.68 ms ± 304 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='csr', impl='opencl', target=(2, 0))\n", "3.41 ms ± 698 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=2, split='full', algo='csr', impl='opencl', target=(2, 0))\n", "83 ms ± 802 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='bbox', algo='lut', impl='opencl', target=(0, 0))\n", "3.18 ms ± 1.02 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='lut', impl='opencl', target=(0, 0))\n", "301 ms ± 4.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='lut', impl='opencl', target=(0, 0))\n", "1.59 ms ± 2.57 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='no', algo='lut', impl='opencl', target=(0, 0))\n", "181 ms ± 2.71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='bbox', algo='lut', impl='opencl', target=(0, 1))\n", "3.19 ms ± 39.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='lut', impl='opencl', target=(0, 1))\n", "304 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='lut', impl='opencl', target=(0, 1))\n", "1.82 ms ± 13 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", "Method(dim=2, split='no', algo='lut', impl='opencl', target=(0, 1))\n", "182 ms ± 774 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='bbox', algo='lut', impl='opencl', target=(1, 0))\n", "4.63 ms ± 24 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='lut', impl='opencl', target=(1, 0))\n", "170 ms ± 2.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='lut', impl='opencl', target=(1, 0))\n", "3.68 ms ± 30.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='lut', impl='opencl', target=(1, 0))\n", "139 ms ± 2.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='bbox', algo='lut', impl='opencl', target=(2, 0))\n", "4.07 ms ± 164 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='bbox', algo='lut', impl='opencl', target=(2, 0))\n", "212 ms ± 1.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='no', algo='lut', impl='opencl', target=(2, 0))\n", "2.83 ms ± 223 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='no', algo='lut', impl='opencl', target=(2, 0))\n", "182 ms ± 3.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='lut', impl='opencl', target=(0, 0))\n", "2.6 ms ± 2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='lut', impl='opencl', target=(0, 0))\n", "298 ms ± 2.01 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='lut', impl='opencl', target=(0, 1))\n", "2.78 ms ± 152 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='lut', impl='opencl', target=(0, 1))\n", "301 ms ± 1.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='lut', impl='opencl', target=(1, 0))\n", "4.64 ms ± 18.7 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='lut', impl='opencl', target=(1, 0))\n", "168 ms ± 1.24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "Method(dim=1, split='full', algo='lut', impl='opencl', target=(2, 0))\n", "3.49 ms ± 177 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Method(dim=2, split='full', algo='lut', impl='opencl', target=(2, 0))\n", "215 ms ± 2.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", "CPU times: user 1h 50min 47s, sys: 7min 40s, total: 1h 58min 28s\n", "Wall time: 7min 41s\n" ] } ], "source": [ "%%time\n", "#Tune those parameters to match your needs:\n", "kw1 = {\"data\": img, \"npt\":1000}\n", "kw2 = {\"data\": img, \"npt_rad\":1000}\n", "#Actual benchmark:\n", "res = {}\n", "for k,v in pyFAI.method_registry.IntegrationMethod._registry.items():\n", " print(k)\n", " if k.dim == 1:\n", " res[k] = %timeit -o ai.integrate1d(method=v, **kw1)\n", " else:\n", " res[k] = %timeit -o ai.integrate2d(method=v, **kw2)" ] }, { "cell_type": "code", "execution_count": 6, "id": "cf37152a-27df-4105-a372-7d8d21366fc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--------------------------------------------------------------------------------\n", "Split | Algo | Impl | 1d (ms) | 2d (ms) | ratio | Device\n", "--------------------------------------------------------------------------------\n", "no | histogram | python| 29.819 | 113.944 | 3.8 | \n", "no | histogram | cython| 11.131 | 16.140 | 1.5 | \n", "bbox | histogram | cython| 25.982 | 32.473 | 1.2 | \n", "full | histogram | cython| 166.489 | 313.909 | 1.9 | \n", "no | csr | cython| 7.103 | 7.166 | 1.0 | \n", "bbox | csr | cython| 7.468 | 8.253 | 1.1 | \n", "no | csr | python| 9.785 | 14.210 | 1.5 | \n", "bbox | csr | python| 13.559 | 16.487 | 1.2 | \n", "no | csc | cython| 8.101 | 10.589 | 1.3 | \n", "bbox | csc | cython| 10.463 | 13.837 | 1.3 | \n", "no | csc | python| 10.874 | 14.124 | 1.3 | \n", "bbox | csc | python| 14.671 | 21.500 | 1.5 | \n", "bbox | lut | cython| 7.437 | 11.805 | 1.6 | \n", "no | lut | cython| 8.069 | 7.759 | 1.0 | \n", "full | lut | cython| 7.155 | 11.516 | 1.6 | \n", "full | csr | cython| 8.101 | 16.944 | 2.1 | \n", "full | csr | python| 12.732 | 16.990 | 1.3 | \n", "full | csc | cython| 10.287 | 14.022 | 1.4 | \n", "full | csc | python| 14.935 | 21.987 | 1.5 | \n", "no | histogram | opencl| 9.404 | 2.698 | 0.3 | NVIDIA CUDA / NVIDIA RTX A5000\n", "no | histogram | opencl| 8.459 | 4.209 | 0.5 | NVIDIA CUDA / Quadro P2200\n", "no | histogram | opencl| 14.044 | 10.007 | 0.7 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "no | histogram | opencl| 11.260 | 7.409 | 0.7 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "bbox | csr | opencl| 0.702 | 2.646 | 3.8 | NVIDIA CUDA / NVIDIA RTX A5000\n", "no | csr | opencl| 0.658 | 2.571 | 3.9 | NVIDIA CUDA / NVIDIA RTX A5000\n", "bbox | csr | opencl| 1.217 | 6.083 | 5.0 | NVIDIA CUDA / Quadro P2200\n", "no | csr | opencl| 1.076 | 6.044 | 5.6 | NVIDIA CUDA / Quadro P2200\n", "bbox | csr | opencl| 3.725 | 7.421 | 2.0 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "no | csr | opencl| 2.993 | 5.990 | 2.0 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "bbox | csr | opencl| 2.915 | 82.315 | 28.2 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "no | csr | opencl| 2.003 | 82.015 | 41.0 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "full | csr | opencl| 0.704 | 2.565 | 3.6 | NVIDIA CUDA / NVIDIA RTX A5000\n", "full | csr | opencl| 1.219 | 6.083 | 5.0 | NVIDIA CUDA / Quadro P2200\n", "full | csr | opencl| 4.180 | 7.451 | 1.8 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "full | csr | opencl| 2.789 | 82.252 | 29.5 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "bbox | lut | opencl| 3.174 | 297.997 | 93.9 | NVIDIA CUDA / NVIDIA RTX A5000\n", "no | lut | opencl| 1.590 | 178.964 | 112.6 | NVIDIA CUDA / NVIDIA RTX A5000\n", "bbox | lut | opencl| 3.149 | 298.830 | 94.9 | NVIDIA CUDA / Quadro P2200\n", "no | lut | opencl| 1.811 | 181.036 | 100.0 | NVIDIA CUDA / Quadro P2200\n", "bbox | lut | opencl| 4.596 | 167.232 | 36.4 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "no | lut | opencl| 3.617 | 135.159 | 37.4 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "bbox | lut | opencl| 3.778 | 210.177 | 55.6 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "no | lut | opencl| 2.601 | 178.323 | 68.5 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "full | lut | opencl| 2.600 | 294.105 | 113.1 | NVIDIA CUDA / NVIDIA RTX A5000\n", "full | lut | opencl| 2.702 | 299.181 | 110.7 | NVIDIA CUDA / Quadro P2200\n", "full | lut | opencl| 4.621 | 166.068 | 35.9 | Portable Computing Language / cpu-haswell-AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "full | lut | opencl| 3.341 | 213.092 | 63.8 | Intel(R) OpenCL / AMD Ryzen Threadripper PRO 3975WX 32-Cores\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ "print(\"-\"*80)\n", "print(f\"{'Split':5s} | {'Algo':9s} | {'Impl':6s}| {'1d (ms)':8s} | {'2d (ms)':8s} | {'ratio':6s} | Device\")\n", "print(\"-\"*80)\n", "for k in res:\n", " if k.dim == 1:\n", " k1 = k\n", " k2 = k._replace(dim=2)\n", " if k2 in res:\n", " print(f\"{k1.split:5s} | {k1.algo:9s} | {k1.impl:6s}| {res[k1].best*1000:8.3f} | {res[k2].best*1000:8.3f} | {res[k2].best/res[k1].best:6.1f} | \",\n", " end=\"\")\n", " if k.target:\n", " print(pyFAI.method_registry.IntegrationMethod._registry.get(k).target_name)\n", " else:\n", " print()\n", "print(\"-\"*80)" ] }, { "cell_type": "code", "execution_count": 7, "id": "4d1e3ab4-aabd-4429-b1b2-818eb1f386a9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total runtime: 462.969s\n" ] } ], "source": [ "print(f\"Total runtime: {time.perf_counter()-start_time:.3f}s\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }