Open Qmin: utilities.cu File Reference

#include "utilities.cuh"
#include "functions.h"

Include dependency graph for utilities.cu:

Classes
struct	SharedMemory< T >

struct	SharedMemory< double >

Functions
template<class T , unsigned int blockSize>
__global__ void	reduce6 (T g_idata, T g_odata, unsigned int n)

template<class T >
void	reduce (int size, int threads, int blocks, T d_idata, T d_odata)
	access cuda sdk reduction6 More...

template<class T >
T	gpuReduction (int n, int numThreads, int numBlocks, int maxThreads, int maxBlocks, T d_idata, T d_odata)
	like benchmarkReduce, interfaces with reduce and returns result More...

__global__ void	gpu_serial_reduction_kernel (scalar array, scalar output, int helperIdx, int N)

__global__ void	gpu_serial_reduction_kernel2 (scalar array, scalar output, int helperIdx, int N)

__global__ void	gpu_parallel_block_reduction_kernel (scalar input, scalar output, int N)

__global__ void	gpu_parallel_block_reduction2_kernel (scalar input, scalar output, int N)

__global__ void	gpu_parallel_block_reduction3_kernel (scalar input, scalar output, int N)

__global__ void	gpu_vec_dot_product_kernel (dVec input1, dVec input2, scalar *output, int N)

__global__ void	gpu_vec_dot_product_unrolled_kernel (dVec input1, dVec input2, scalar *output, int N)

__global__ void	gpu_dVec_dot_products_kernel (dVec input1, dVec input2, scalar *output, int N)

__global__ void	gpu_unrolled_dVec_dot_products_kernel (dVec input1, dVec input2, scalar *output, int N)

__global__ void	gpu_scalar_times_dVec_squared_kernel (dVec d_vec1, scalar d_scalars, scalar factor, scalar *d_ans, int n)

__global__ void	gpu_dot_dVec_vectors_kernel (dVec d_vec1, dVec d_vec2, scalar *d_ans, int n)

__global__ void	gpu_dVec_times_scalar_kernel (dVec *d_vec1, scalar factor, int n)

__global__ void	gpu_dVec_times_scalar_kernel (dVec d_vec1, scalar factor, dVec d_ans, int n)

__global__ void	gpu_dVec_plusEqual_dVec_kernel (dVec d_vec1, dVec d_vec2, scalar factor, int n)

bool	gpu_dVec_plusEqual_dVec (dVec d_vec1, dVec d_vec2, scalar factor, int N, int maxBlockSize=512)
	vec1 += a*vec2 More...

bool	gpu_dVec_times_scalar (dVec *d_vec1, scalar factor, int N)
	(dVec) input *= factor More...

bool	gpu_dVec_times_scalar (dVec d_vec1, scalar factor, dVec d_ans, int N)
	(dVec) ans = input * factor More...

bool	gpu_scalar_times_dVec_squared (dVec d_vec1, scalar d_scalars, scalar factor, scalar *d_answer, int N)
	ans = ab[i]c[i]^2r More...

bool	gpu_dot_dVec_vectors (dVec d_vec1, dVec d_vec2, scalar *d_ans, int N)
	(scalar) ans = (dVec) vec1 . vec2 More...

scalar	gpu_gpuarray_dVec_dot_products (GPUArray< dVec > &input1, GPUArray< dVec > &input2, GPUArray< scalar > &intermediate, GPUArray< scalar > &intermediate2, int N=0, int maxBlockSize=512)
	A function of convenience: take the gpuarrays themselves and dot the data. More...

bool	gpu_dVec_dot_products (dVec input1, dVec input2, scalar intermediate, scalar intermediate2, scalar *output, int helperIdx, int N, int block_size)
	Take two vectors of dVecs and compute the sum of the dot products between them. More...

bool	gpu_dVec_dot_products (dVec input1, dVec input2, scalar *output, int helperIdx, int N)
	Take two vectors of dVecs and compute the sum of the dot products between them using thrust. More...

bool	gpu_parallel_reduction (scalar input, scalar intermediate, scalar *output, int helperIdx, int N, int block_size)
	A straightforward two-step parallel reduction algorithm with block_size declared. More...

bool	gpu_serial_reduction (scalar array, scalar output, int helperIdx, int N)
	A trivial reduction of an array by one thread in serial. Think before you use this. More...

template<typename T >
__global__ void	gpu_set_array_kernel (T *arr, T value, int N)

template<typename T >
bool	gpu_set_array (T *arr, T value, int N, int maxBlockSize=512)
	set every element of an array to the specified value More...

template<typename T >
__global__ void	gpu_copy_gpuarray_kernel (T copyInto, T copyFrom, int N)

template<typename T >
bool	gpu_copy_gpuarray (GPUArray< T > &copyInto, GPUArray< T > &copyFrom, int block_size=512)
	copy data into target on the device More...

scalar	host_dVec_dot_products (dVec input1, dVec input2, int N)
	Take two vectors of dVecs and compute the sum of the dot products between them on the host. More...

void	host_dVec_plusEqual_dVec (dVec d_vec1, dVec d_vec2, scalar factor, int N)
	vec1 += a*vec2... on the host! More...

void	host_dVec_times_scalar (dVec d_vec1, scalar factor, dVec d_ans, int N)
	(dVec) ans = input * factor... on the host More...

template scalar	gpuReduction< scalar > (int n, int numThreads, int numBlocks, int maxThreads, int maxBlocks, scalar d_idata, scalar d_odata)

template int	gpuReduction< int > (int n, int numThreads, int numBlocks, int maxThreads, int maxBlocks, int d_idata, int d_odata)

template void	reduce< int > (int size, int threads, int blocks, int d_idata, int d_odata)

template void	reduce< scalar > (int size, int threads, int blocks, scalar d_idata, scalar d_odata)

template bool	gpu_copy_gpuarray< dVec > (GPUArray< dVec > &copyInto, GPUArray< dVec > &copyFrom, int maxBlockSize)

template bool	gpu_copy_gpuarray< scalar > (GPUArray< scalar > &copyInto, GPUArray< scalar > &copyFrom, int maxBlockSize)

template bool	gpu_set_array< int > (int *, int, int, int)

template bool	gpu_set_array< unsigned int > (unsigned int *, unsigned int, int, int)

template bool	gpu_set_array< int2 > (int2 *, int2, int, int)

template bool	gpu_set_array< scalar > (scalar *, scalar, int, int)

template bool	gpu_set_array< dVec > (dVec *, dVec, int, int)

template bool	gpu_set_array< cubicLatticeDerivativeVector > (cubicLatticeDerivativeVector *, cubicLatticeDerivativeVector, int, int)

Detailed Description

defines kernel callers and kernels for some simple GPU array calculations

Classes

Functions

Detailed Description