In week 2, we set up a lot of the framework necessary for ray tracing and generated a very basic image where a unique GPU thread is responsible for generating the color of each pixel individually.
The code for week 2 can be found in this Google Colab notebook; you can open your own (click "new notebook"). Make sure that you are using the "T4 GPU" runtime when running the cells (you will find it under runtime --> change runtime type).
!pip install pycuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
import numpy as np
from PIL import Image
from time import perf_counter
WIDTH = 600
HEIGHT = 600
image_buffer = np.zeros((HEIGHT, WIDTH, 4), dtype=np.float32)
with open("ray_trace.cu") as cuda_file:
mod = SourceModule(cuda_file.read())
ray_trace = mod.get_function("ray_trace")
block_size = (16, 16, 1)
grid_size = (
(WIDTH + block_size[0] - 1) // block_size[0],
(HEIGHT + block_size[1] - 1) // block_size[1],
1
)
t0 = perf_counter()
ray_trace(
cuda.Out(image_buffer),
np.int32(WIDTH),
np.int32(HEIGHT),
block=block_size,
grid=grid_size
)
t1 = perf_counter()
image_data = (image_buffer.clip(0, 1) * 255).astype(np.uint8)
image = Image.fromarray(image_data)
image.save('result.png')
print(f'generated image in {t1 - t0} seconds')
image
%%writefile ray_trace.cu
__global__ void ray_trace(float4 *pixels, int width, int height) {
int2 pix_idx = make_int2(
blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y
);
if (pix_idx.x >= width || pix_idx.y >= height) return;
int idx = pix_idx.y * width + pix_idx.x;
pixels[idx] = make_float4(
(float)pix_idx.x / width,
(float)pix_idx.y / height,
0.0,
1.0
);
}