#!/usr/bin/env python
# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/
"""Module for (filtered) backprojection on the GPU"""
from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["A. Mirone, P. Paleo"]
__license__ = "MIT"
__date__ = "25/01/2019"
import logging
import numpy as np
from .common import pyopencl
from .processing import EventDescription, OpenclProcessing, BufferDescription
from .sinofilter import SinoFilter
from .sinofilter import fourier_filter as fourier_filter_
from ..utils.deprecation import deprecated
if pyopencl:
mf = pyopencl.mem_flags
import pyopencl.array as parray
else:
raise ImportError("Please install pyopencl in order to use opencl backprojection")
logger = logging.getLogger(__name__)
def _sizeof(Type):
"""
return the size (in bytes) of a scalar type, like the C behavior
"""
return np.dtype(Type).itemsize
def _idivup(a, b):
"""
return the integer division, plus one if `a` is not a multiple of `b`
"""
return (a + (b - 1)) // b
[docs]class Backprojection(OpenclProcessing):
"""A class for performing the backprojection using OpenCL"""
kernel_files = ["backproj.cl", "array_utils.cl"]
def __init__(self, sino_shape, slice_shape=None, axis_position=None,
angles=None, filter_name=None, ctx=None, devicetype="all",
platformid=None, deviceid=None, profile=False,
extra_options=None):
"""Constructor of the OpenCL (filtered) backprojection
:param sino_shape: shape of the sinogram. The sinogram is in the format
(n_b, n_a) where n_b is the number of detector bins
and n_a is the number of angles.
:param slice_shape: Optional, shape of the reconstructed slice. By
default, it is a square slice where the dimension
is the "x dimension" of the sinogram (number of
bins).
:param axis_position: Optional, axis position. Default is
`(shape[1]-1)/2.0`.
:param angles: Optional, a list of custom angles in radian.
:param filter_name: Optional, name of the filter for FBP. Default is
the Ram-Lak filter.
:param ctx: actual working context, left to None for automatic
initialization from device type or platformid/deviceid
:param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
:param platformid: integer with the platform_identifier, as given by
clinfo
:param deviceid: Integer with the device identifier, as given by clinfo
:param profile: switch on profiling to be able to profile at the kernel
level, store profiling elements (makes code slightly
slower)
:param extra_options: Advanced extra options in the form of a dict.
Current options are: cutoff, use_numpy_fft
"""
# OS X enforces a workgroup size of 1 when the kernel has
# synchronization barriers if sys.platform.startswith('darwin'):
# assuming no discrete GPU
# raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
platformid=platformid, deviceid=deviceid,
profile=profile)
self._init_geometry(sino_shape, slice_shape, angles, axis_position,
extra_options)
self._allocate_memory()
self._compute_angles()
self._init_kernels()
self._init_filter(filter_name)
def _init_geometry(self, sino_shape, slice_shape, angles, axis_position,
extra_options):
"""Geometry Initialization
:param sino_shape: shape of the sinogram. The sinogram is in the format
(n_b, n_a) where n_b is the number of detector bins
and n_a is the number of angles.
:param slice_shape: shape of the reconstructed slice. By
default, it is a square slice where the dimension
is the "x dimension" of the sinogram (number of
bins).
:param angles: list of projection angles in radian.
:param axis_position: axis position
:param dict extra_options: Advanced extra options
"""
self.shape = sino_shape
self.num_bins = np.int32(sino_shape[1])
self.num_projs = np.int32(sino_shape[0])
self.angles = angles
if slice_shape is None:
self.slice_shape = (self.num_bins, self.num_bins)
else:
self.slice_shape = slice_shape
self.dimrec_shape = (
_idivup(self.slice_shape[0], 32) * 32,
_idivup(self.slice_shape[1], 32) * 32
)
if axis_position:
self.axis_pos = np.float32(axis_position)
else:
self.axis_pos = np.float32((sino_shape[1] - 1.) / 2)
self.axis_array = None # TODO: add axis correction front-end
self._init_extra_options(extra_options)
def _init_extra_options(self, extra_options):
"""Backprojection extra option initialization
:param dict extra_options: Advanced extra options
"""
self.extra_options = {
"cutoff": 1.,
"use_numpy_fft": False,
# It is axis_pos - (num_bins-1)/2 in PyHST
"gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
"gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
}
if extra_options is not None:
self.extra_options.update(extra_options)
def _allocate_memory(self):
# Host memory
self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
self.is_cpu = False
if self.device.type == "CPU":
self.is_cpu = True
# Device memory
self.buffers = [
BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE),
BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available)
BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY),
BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY),
BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY),
]
self.allocate_buffers(use_array=True)
self.d_sino = self.cl_mem["d_sino"] # shorthand
# Texture memory (if relevant)
if not(self.is_cpu):
self._allocate_textures()
# Local memory
self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes
def _compute_angles(self):
if self.angles is None:
self.angles = np.linspace(0, np.pi, self.num_projs, False)
h_cos = np.cos(self.angles).astype(np.float32)
h_sin = np.sin(self.angles).astype(np.float32)
self.cl_mem["d_cos"][:] = h_cos[:]
self.cl_mem["d_sin"][:] = h_sin[:]
if self.axis_array:
self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:]
else:
self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
def _init_kernels(self):
OpenclProcessing.compile_kernels(self, self.kernel_files)
# check that workgroup can actually be (16, 16)
self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
# Workgroup and ndrange sizes are always the same
self.wg = (16, 16)
self.ndrange = (
_idivup(int(self.dimrec_shape[1]), 32) * self.wg[0],
_idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
)
# Prepare arguments for the kernel call
if self.is_cpu:
d_sino_ref = self.d_sino.data
else:
d_sino_ref = self.d_sino_tex
self._backproj_kernel_args = (
# num of projections (int32)
self.num_projs,
# num of bins (int32)
self.num_bins,
# axis position (float32)
self.axis_pos,
# d_slice (__global float32*)
self.cl_mem["_d_slice"].data,
# d_sino (__read_only image2d_t or float*)
d_sino_ref,
# gpu_offset_x (float32)
np.float32(self.extra_options["gpu_offset_x"]),
# gpu_offset_y (float32)
np.float32(self.extra_options["gpu_offset_y"]),
# d_cos (__global float32*)
self.cl_mem["d_cos"].data,
# d_sin (__global float32*)
self.cl_mem["d_sin"].data,
# d_axis (__global float32*)
self.cl_mem["d_axes"].data,
# shared mem (__local float32*)
self._get_local_mem()
)
def _allocate_textures(self):
"""
Allocate the texture for the sinogram.
"""
self.d_sino_tex = pyopencl.Image(
self.ctx,
mf.READ_ONLY | mf.USE_HOST_PTR,
pyopencl.ImageFormat(
pyopencl.channel_order.INTENSITY,
pyopencl.channel_type.FLOAT
),
hostbuf=np.zeros(self.shape[::-1], dtype=np.float32)
)
def _init_filter(self, filter_name):
"""Filter initialization
:param str filter_name: filter name
"""
self.filter_name = filter_name or "ram-lak"
self.sino_filter = SinoFilter(
self.shape,
ctx=self.ctx,
filter_name=self.filter_name,
extra_options=self.extra_options,
)
def _get_local_mem(self):
return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
def _cpy2d_to_slice(self, dst):
ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0]))
slice_shape_ocl = np.int32(ndrange)
wg = None
kernel_args = (
dst.data,
self.cl_mem["_d_slice"].data,
np.int32(self.slice_shape[1]),
np.int32(self.dimrec_shape[1]),
np.int32((0, 0)),
np.int32((0, 0)),
slice_shape_ocl
)
return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
def _transfer_to_texture(self, sino):
if isinstance(sino, parray.Array):
return self._transfer_device_to_texture(sino)
sino2 = sino
if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
sino2 = np.ascontiguousarray(sino, dtype=np.float32)
if self.is_cpu:
ev = pyopencl.enqueue_copy(
self.queue,
self.d_sino.data,
sino2
)
what = "transfer filtered sino H->D buffer"
ev.wait()
else:
ev = pyopencl.enqueue_copy(
self.queue,
self.d_sino_tex,
sino2,
origin=(0, 0),
region=self.shape[::-1]
)
what = "transfer filtered sino H->D texture"
return EventDescription(what, ev)
def _transfer_device_to_texture(self, d_sino):
if self.is_cpu:
if id(self.d_sino) == id(d_sino):
return
ev = pyopencl.enqueue_copy(
self.queue,
self.d_sino.data,
d_sino
)
what = "transfer filtered sino D->D buffer"
ev.wait()
else:
ev = pyopencl.enqueue_copy(
self.queue,
self.d_sino_tex,
d_sino.data,
offset=0,
origin=(0, 0),
region=self.shape[::-1]
)
what = "transfer filtered sino D->D texture"
return EventDescription(what, ev)
[docs] def backprojection(self, sino, output=None):
"""Perform the backprojection on an input sinogram
:param sino: sinogram.
:param output: optional, output slice.
If provided, the result will be written in this array.
:return: backprojection of sinogram
"""
events = []
with self.sem:
events.append(self._transfer_to_texture(sino))
# Call the backprojection kernel
if self.is_cpu:
kernel_to_call = self.kernels.backproj_cpu_kernel
else:
kernel_to_call = self.kernels.backproj_kernel
kernel_to_call(
self.queue,
self.ndrange,
self.wg,
*self._backproj_kernel_args
)
# Return
if output is None:
res = self.cl_mem["_d_slice"].get()
res = res[:self.slice_shape[0], :self.slice_shape[1]]
else:
res = output
self._cpy2d_to_slice(output)
# /with self.sem
if self.profile:
self.events += events
return res
[docs] def filtered_backprojection(self, sino, output=None):
"""
Compute the filtered backprojection (FBP) on a sinogram.
:param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`)
with the shape (n_projections, n_bins)
:param output: output (`np.ndarray` or `pyopencl.array.Array`).
If nothing is provided, a new numpy array is returned.
"""
# Filter
self.sino_filter(sino, output=self.d_sino)
# Backproject
res = self.backprojection(self.d_sino, output=output)
return res
__call__ = filtered_backprojection
# -------------------
# - Compatibility -
# -------------------
@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
[docs] def filter_projections(self, sino, rescale=True):
self.sino_filter(sino, output=self.d_sino)
def fourier_filter(sino, filter_=None, fft_size=None):
return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)