#!/usr/bin/env python
"""
PyCUDA-based FFT functions.
"""
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import pycuda.elementwise as el
import pycuda.tools as tools
import numpy as np
from . import cufft
from .cufft import CUFFT_COMPATIBILITY_NATIVE, \
CUFFT_COMPATIBILITY_FFTW_PADDING, \
CUFFT_COMPATIBILITY_FFTW_ASYMMETRIC, \
CUFFT_COMPATIBILITY_FFTW_ALL
from . import misc
[docs]class Plan:
"""
CUFFT plan class.
This class represents an FFT plan for CUFFT.
Parameters
----------
shape : tuple of ints
Transform shape. May contain more than 3 elements.
in_dtype : { numpy.float32, numpy.float64, numpy.complex64, numpy.complex128 }
Type of input data.
out_dtype : { numpy.float32, numpy.float64, numpy.complex64, numpy.complex128 }
Type of output data.
batch : int
Number of FFTs to configure in parallel (default is 1).
stream : pycuda.driver.Stream
Stream with which to associate the plan. If no stream is specified,
the default stream is used.
mode : int
FFTW compatibility mode.
"""
[docs] def __init__(self, shape, in_dtype, out_dtype, batch=1, stream=None,
mode=0x01):
if np.isscalar(shape):
self.shape = (shape, )
else:
self.shape = shape
self.in_dtype = in_dtype
self.out_dtype = out_dtype
if batch <= 0:
raise ValueError('batch size must be greater than 0')
self.batch = batch
# Determine type of transformation:
if in_dtype == np.float32 and out_dtype == np.complex64:
self.fft_type = cufft.CUFFT_R2C
self.fft_func = cufft.cufftExecR2C
elif in_dtype == np.complex64 and out_dtype == np.float32:
self.fft_type = cufft.CUFFT_C2R
self.fft_func = cufft.cufftExecC2R
elif in_dtype == np.complex64 and out_dtype == np.complex64:
self.fft_type = cufft.CUFFT_C2C
self.fft_func = cufft.cufftExecC2C
elif in_dtype == np.float64 and out_dtype == np.complex128:
self.fft_type = cufft.CUFFT_D2Z
self.fft_func = cufft.cufftExecD2Z
elif in_dtype == np.complex128 and out_dtype == np.float64:
self.fft_type = cufft.CUFFT_Z2D
self.fft_func = cufft.cufftExecZ2D
elif in_dtype == np.complex128 and out_dtype == np.complex128:
self.fft_type = cufft.CUFFT_Z2Z
self.fft_func = cufft.cufftExecZ2Z
else:
raise ValueError('unsupported input/output type combination')
# Check for double precision support:
capability = misc.get_compute_capability(misc.get_current_device())
if capability < 1.3 and \
(misc.isdoubletype(in_dtype) or misc.isdoubletype(out_dtype)):
raise RuntimeError('double precision requires compute capability '
'>= 1.3 (you have %g)' % capability)
# Set up plan:
if len(self.shape) > 0:
n = np.asarray(self.shape, np.int32)
self.handle = cufft.cufftPlanMany(len(self.shape), n.ctypes.data,
None, 1, 0, None, 1, 0,
self.fft_type, self.batch)
else:
raise ValueError('invalid transform size')
# Set FFTW compatibility mode:
cufft.cufftSetCompatibilityMode(self.handle, mode)
# Associate stream with plan:
if stream != None:
cufft.cufftSetStream(self.handle, stream.handle)
def __del__(self):
# Don't complain if handle destruction fails because the plan
# may have already been cleaned up:
try:
cufft.cufftDestroy(self.handle)
except:
pass
def _scale_inplace(a, x_gpu):
"""
Scale an array by a specified value in-place.
"""
# Cache the kernel to avoid invoking the compiler if the
# specified scale factor and array type have already been encountered:
try:
func = _scale_inplace.cache[(a, x_gpu.dtype)]
except KeyError:
ctype = tools.dtype_to_ctype(x_gpu.dtype)
func = el.ElementwiseKernel(
"{ctype} a, {ctype} *x".format(ctype=ctype),
"x[i] /= a")
_scale_inplace.cache[(a, x_gpu.dtype)] = func
func(x_gpu.dtype.type(a), x_gpu)
_scale_inplace.cache = {}
def _fft(x_gpu, y_gpu, plan, direction, scale=None):
"""
Fast Fourier Transform.
Parameters
----------
x_gpu : pycuda.gpuarray.GPUArray
Input array.
y_gpu : pycuda.gpuarray.GPUArray
Output array.
plan : Plan
FFT plan.
direction : { cufft.CUFFT_FORWARD, cufft.CUFFT_INVERSE }
Transform direction. Only affects in-place transforms.
Optional Parameters
-------------------
scale : int or float
Scale the values in the output array by dividing them by this value.
Notes
-----
This function should not be called directly.
"""
if (x_gpu.gpudata == y_gpu.gpudata) and \
plan.fft_type not in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]:
raise ValueError('can only compute in-place transform of complex data')
if direction == cufft.CUFFT_FORWARD and \
plan.in_dtype in np.sctypes['complex'] and \
plan.out_dtype in np.sctypes['float']:
raise ValueError('cannot compute forward complex -> real transform')
if direction == cufft.CUFFT_INVERSE and \
plan.in_dtype in np.sctypes['float'] and \
plan.out_dtype in np.sctypes['complex']:
raise ValueError('cannot compute inverse real -> complex transform')
if plan.fft_type in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]:
plan.fft_func(plan.handle, int(x_gpu.gpudata), int(y_gpu.gpudata),
direction)
else:
plan.fft_func(plan.handle, int(x_gpu.gpudata),
int(y_gpu.gpudata))
# Scale the result by dividing it by the number of elements:
if scale != None:
_scale_inplace(scale, y_gpu)
[docs]def fft(x_gpu, y_gpu, plan, scale=False):
"""
Fast Fourier Transform.
Compute the FFT of some data in device memory using the
specified plan.
Parameters
----------
x_gpu : pycuda.gpuarray.GPUArray
Input array.
y_gpu : pycuda.gpuarray.GPUArray
FFT of input array.
plan : Plan
FFT plan.
scale : bool, optional
If True, scale the computed FFT by the number of elements in
the input array.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> N = 128
>>> x = np.asarray(np.random.rand(N), np.float32)
>>> xf = np.fft.fft(x)
>>> x_gpu = gpuarray.to_gpu(x)
>>> xf_gpu = gpuarray.empty(N/2+1, np.complex64)
>>> plan = Plan(x.shape, np.float32, np.complex64)
>>> fft(x_gpu, xf_gpu, plan)
>>> np.allclose(xf[0:N/2+1], xf_gpu.get(), atol=1e-6)
True
Returns
-------
y_gpu : pycuda.gpuarray.GPUArray
Computed FFT.
Notes
-----
For real to complex transformations, this function computes
N/2+1 non-redundant coefficients of a length-N input signal.
"""
if scale == True:
return _fft(x_gpu, y_gpu, plan, cufft.CUFFT_FORWARD, x_gpu.size/plan.batch)
else:
return _fft(x_gpu, y_gpu, plan, cufft.CUFFT_FORWARD)
[docs]def ifft(x_gpu, y_gpu, plan, scale=False):
"""
Inverse Fast Fourier Transform.
Compute the inverse FFT of some data in device memory using the
specified plan.
Parameters
----------
x_gpu : pycuda.gpuarray.GPUArray
Input array.
y_gpu : pycuda.gpuarray.GPUArray
Inverse FFT of input array.
plan : Plan
FFT plan.
scale : bool, optional
If True, scale the computed inverse FFT by the number of
elements in the output array.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> N = 128
>>> x = np.asarray(np.random.rand(N), np.float32)
>>> xf = np.asarray(np.fft.fft(x), np.complex64)
>>> xf_gpu = gpuarray.to_gpu(xf[0:N/2+1])
>>> x_gpu = gpuarray.empty(N, np.float32)
>>> plan = Plan(N, np.complex64, np.float32)
>>> ifft(xf_gpu, x_gpu, plan, True)
>>> np.allclose(x, x_gpu.get(), atol=1e-6)
True
Notes
-----
For complex to real transformations, this function assumes the
input contains N/2+1 non-redundant FFT coefficents of a signal of
length N.
"""
if scale == True:
return _fft(x_gpu, y_gpu, plan, cufft.CUFFT_INVERSE, y_gpu.size/plan.batch)
else:
return _fft(x_gpu, y_gpu, plan, cufft.CUFFT_INVERSE)
if __name__ == "__main__":
import doctest
doctest.testmod()