In [1]:
import numpy as np
import cupy as cp
from cupyx.profiler import benchmark

In [2]:
def my_func(a, b):
    return cp.pi * cp.sin(-a) * cp.exp(b)

In [3]:
a = cp.random.random((2560, 1024))
b = cp.random.random((2560, 1024))
print(benchmark(my_func, (a,b), n_repeat=100000))

my_func             :    CPU:    36.410 us   +/-  5.270 (min:    34.873 / max:  1140.029) us     GPU-0:    94.612 us   +/-  4.973 (min:    91.424 / max:  1172.864) us


In [4]:
np_rng = np.random.default_rng()

A_cpu = np_rng.random((1000, 1000))
b_cpu = np_rng.random(1000)

A_gpu = cp.array(A_cpu)
b_gpu = cp.array(b_cpu)

In [5]:
%%timeit
np.linalg.solve(A_cpu, b_cpu)

17.6 ms ± 62.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%%timeit
cp.linalg.solve(A_gpu, b_gpu)

2.49 ms ± 268 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
x_gpu = cp.ones((1000, 1000))
x_cpu = x_gpu.get()
print(f'Type of CPU array is: {type(x_cpu)}')

Type of CPU array is: <class 'numpy.ndarray'>
