Skip to content
This repository has been archived by the owner on Sep 25, 2023. It is now read-only.

Fix complex type bug with correlate #382

Open
wants to merge 1 commit into
base: branch-22.12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions cpp/src/convolution/_convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,12 @@ __device__ void _cupy_correlate( const T *__restrict__ inp,
if ( mode == 0 ) { // Valid
if ( tid >= 0 && tid < inpW ) {
for ( int j = 0; j < kerW; j++ ) {
temp += inp[tid + j] * kernel[j];
if constexpr ( std::is_same_v<T, thrust::complex<float>> ||
std::is_same_v<T, thrust::complex<double>> ) {
temp += inp[tid + j] * thrust::conj( kernel[j] );
} else {
temp += inp[tid + j] * kernel[j];
}
}
}
} else if ( mode == 1 ) { // Same
Expand All @@ -171,21 +176,35 @@ __device__ void _cupy_correlate( const T *__restrict__ inp,
}
for ( int j = 0; j < kerW; j++ ) {
if ( ( start + j >= 0 ) && ( start + j < inpW ) ) {
temp += inp[start + j] * kernel[j];
if constexpr ( std::is_same_v<T, thrust::complex<float>> ||
std::is_same_v<T, thrust::complex<double>> ) {
temp += inp[start + j] * thrust::conj( kernel[j] );
} else {
temp += inp[start + j] * kernel[j];
}
}
}
} else { // Full
const int P1 { kerW - 1 };
const int start { 0 - P1 + tid };
for ( int j = 0; j < kerW; j++ ) {
if ( ( start + j >= 0 ) && ( start + j < inpW ) ) {
temp += inp[start + j] * kernel[j];
if constexpr ( std::is_same_v<T, thrust::complex<float>> ||
std::is_same_v<T, thrust::complex<double>> ) {
temp += inp[start + j] * thrust::conj( kernel[j] );
} else {
temp += inp[start + j] * kernel[j];
}
}
}
}

if ( swapped_inputs ) {
out[outW - tid - 1] = temp; // TODO: Move to shared memory
if constexpr ( std::is_same_v<T, thrust::complex<float>> || std::is_same_v<T, thrust::complex<double>> ) {
out[outW - tid - 1] = thrust::conj( temp );
} else {
out[outW - tid - 1] = temp;
}
} else {
out[tid] = temp;
}
Expand Down
95 changes: 76 additions & 19 deletions python/cusignal/test/test_convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import cupy as cp
import cusignal
import pytest
import numpy as np

from cusignal.test.utils import array_equal, _check_rapids_pytest_benchmark
from scipy import signal
Expand All @@ -23,7 +24,20 @@

class TestConvolution:
@pytest.mark.benchmark(group="Correlate")
@pytest.mark.parametrize("num_samps", [2 ** 7, 2 ** 10 + 1, 2 ** 13])
@pytest.mark.parametrize(
"dtype",
[
np.int32,
np.int64,
np.float32,
np.float64,
np.complex64,
np.complex128,
],
)
@pytest.mark.parametrize(
"num_samps", [2 ** 7, 2 ** 10 + 1, 2 ** 13]
)
@pytest.mark.parametrize("num_taps", [125, 2 ** 8, 2 ** 13])
@pytest.mark.parametrize("mode", ["full", "valid", "same"])
@pytest.mark.parametrize("method", ["direct", "fft", "auto"])
Expand All @@ -44,27 +58,29 @@ def test_correlate1d_cpu(
self,
rand_data_gen,
benchmark,
dtype,
num_samps,
num_taps,
mode,
method,
):
cpu_sig, _ = rand_data_gen(num_samps, 1)
cpu_filt, _ = rand_data_gen(num_taps, 1)
cpu_sig, _ = rand_data_gen(num_samps, 1, dtype)
cpu_filt, _ = rand_data_gen(num_taps, 1, dtype)
benchmark(self.cpu_version, cpu_sig, cpu_filt, mode, method)

def test_correlate1d_gpu(
self,
rand_data_gen,
gpubenchmark,
dtype,
num_samps,
num_taps,
mode,
method,
):

cpu_sig, gpu_sig = rand_data_gen(num_samps, 1)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 1)
cpu_sig, gpu_sig = rand_data_gen(num_samps, 1, dtype)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 1, dtype)
output = gpubenchmark(
self.gpu_version,
gpu_sig,
Expand All @@ -77,7 +93,20 @@ def test_correlate1d_gpu(
array_equal(output, key)

@pytest.mark.benchmark(group="Convolve")
@pytest.mark.parametrize("num_samps", [2 ** 7, 2 ** 10 + 1, 2 ** 13])
@pytest.mark.parametrize(
"dtype",
[
np.int32,
np.int64,
np.float32,
np.float64,
np.complex64,
np.complex128,
],
)
@pytest.mark.parametrize(
"num_samps", [2 ** 7, 2 ** 10 + 1, 2 ** 13]
)
@pytest.mark.parametrize("num_taps", [125, 2 ** 8, 2 ** 13])
@pytest.mark.parametrize("mode", ["full", "valid", "same"])
@pytest.mark.parametrize("method", ["direct", "fft", "auto"])
Expand All @@ -96,33 +125,35 @@ def test_convolve1d_cpu(
self,
rand_data_gen,
benchmark,
dtype,
num_samps,
num_taps,
mode,
method,
):
cpu_sig, _ = rand_data_gen(num_samps, 1)
cpu_win = signal.windows.hann(num_taps, 1)
cpu_sig, _ = rand_data_gen(num_samps, 1, dtype)
cpu_win = signal.windows.hann(num_taps, 1).astype(dtype)

benchmark(self.cpu_version, cpu_sig, cpu_win, mode, method)

def test_convolve1d_gpu(
self,
rand_data_gen,
gpubenchmark,
dtype,
num_samps,
num_taps,
mode,
method,
):

cpu_sig, gpu_sig = rand_data_gen(num_samps, 1)
gpu_win = cusignal.windows.hann(num_taps, 1)
cpu_sig, gpu_sig = rand_data_gen(num_samps, 1, dtype)
gpu_win = cusignal.windows.hann(num_taps, 1).astype(dtype)
output = gpubenchmark(
self.gpu_version, gpu_sig, gpu_win, mode, method
)

cpu_win = signal.windows.hann(num_taps, 1)
cpu_win = signal.windows.hann(num_taps, 1).astype(dtype)
key = self.cpu_version(cpu_sig, cpu_win, mode, method)
array_equal(output, key)

Expand Down Expand Up @@ -157,6 +188,17 @@ def test_fftconvolve_gpu(
array_equal(output, key)

@pytest.mark.benchmark(group="Convolve2d")
@pytest.mark.parametrize(
"dtype",
[
np.int32,
np.int64,
np.float32,
np.float64,
np.complex64,
np.complex128,
],
)
@pytest.mark.parametrize("num_samps", [2 ** 8])
@pytest.mark.parametrize("num_taps", [5, 100])
@pytest.mark.parametrize("boundary", ["fill", "wrap", "symm"])
Expand All @@ -178,27 +220,29 @@ def test_convolve2d_cpu(
self,
rand_data_gen,
benchmark,
dtype,
num_samps,
num_taps,
boundary,
mode,
):
cpu_sig, _ = rand_data_gen(num_samps, 2)
cpu_filt, _ = rand_data_gen(num_taps, 2)
cpu_sig, _ = rand_data_gen(num_samps, 2, dtype)
cpu_filt, _ = rand_data_gen(num_taps, 2, dtype)
benchmark(self.cpu_version, cpu_sig, cpu_filt, boundary, mode)

def test_convolve2d_gpu(
self,
rand_data_gen,
gpubenchmark,
dtype,
num_samps,
num_taps,
boundary,
mode,
):

cpu_sig, gpu_sig = rand_data_gen(num_samps, 2)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 2)
cpu_sig, gpu_sig = rand_data_gen(num_samps, 2, dtype)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 2, dtype)
output = gpubenchmark(
self.gpu_version,
gpu_sig,
Expand All @@ -211,6 +255,17 @@ def test_convolve2d_gpu(
array_equal(output, key)

@pytest.mark.benchmark(group="Correlate2d")
@pytest.mark.parametrize(
"dtype",
[
np.int32,
np.int64,
np.float32,
np.float64,
np.complex64,
np.complex128,
],
)
@pytest.mark.parametrize("num_samps", [2 ** 8])
@pytest.mark.parametrize("num_taps", [5, 100])
@pytest.mark.parametrize("boundary", ["fill", "wrap", "symm"])
Expand All @@ -232,27 +287,29 @@ def test_correlate2d_cpu(
self,
rand_data_gen,
benchmark,
dtype,
num_samps,
num_taps,
boundary,
mode,
):
cpu_sig, _ = rand_data_gen(num_samps, 2)
cpu_filt, _ = rand_data_gen(num_taps, 2)
cpu_sig, _ = rand_data_gen(num_samps, 2, dtype)
cpu_filt, _ = rand_data_gen(num_taps, 2, dtype)
benchmark(self.cpu_version, cpu_sig, cpu_filt, boundary, mode)

def test_correlate2d_gpu(
self,
rand_data_gen,
gpubenchmark,
dtype,
num_samps,
num_taps,
boundary,
mode,
):

cpu_sig, gpu_sig = rand_data_gen(num_samps, 2)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 2)
cpu_sig, gpu_sig = rand_data_gen(num_samps, 2, dtype)
cpu_filt, gpu_filt = rand_data_gen(num_taps, 2, dtype)
output = gpubenchmark(
self.gpu_version,
gpu_sig,
Expand Down