In week 3, we discussed the geometry of ray / sphere intersections and implemented in CUDA C++ code many of the primitive operations and data-structures to be used as part of the sphere intersection logic.
The code for week 3 can be found in this Google Colab notebook; feel free to make a copy. Make sure that you are using the "T4 GPU" runtime when running the cells (you will find it under runtime --> change runtime type).
!pip install pycuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
from PIL import Image
from time import perf_counter
WIDTH = 600
HEIGHT = 600
image_buffer = np.zeros((HEIGHT, WIDTH, 4), dtype=np.float32)
with open("ray_trace.cu") as cuda_file:
mod = SourceModule(cuda_file.read())
ray_trace = mod.get_function("ray_trace")
block_size = (16, 16, 1)
grid_size = (
(WIDTH + block_size[0] - 1) // block_size[0],
(HEIGHT + block_size[1] - 1) // block_size[1],
1
)
t0 = perf_counter()
ray_trace(
cuda.Out(image_buffer),
np.int32(WIDTH),
np.int32(HEIGHT),
block=block_size,
grid=grid_size
)
t1 = perf_counter()
image_data = (image_buffer.clip(0, 1) * 255).astype(np.uint8)
image = Image.fromarray(image_data)
image.save('result.png')
print(f'generated image in {t1 - t0} seconds')
image
%%writefile ray_trace.cu
__device__ float3 add3(float3 u, float3 v) { return make_float3(u.x + v.x, u.y + v.y, u.z + v.z); }
__device__ float3 sub3(float3 u, float3 v) { return make_float3(u.x - v.x, u.y - v.y, u.z - v.z); }
__device__ float3 scale3(float s, float3 v) { return make_float3(s * v.x, s * v.y, s * v.z); }
__device__ float dot(float3 u, float3 v) { return u.x * v.x + u.y * v.y + u.z * v.z; }
__device__ float length(float3 v) { return sqrtf(dot(v, v)); }
__device__ float3 normalize(float3 v) { return scale3(1.0/length(v), v); }
struct Ray {
float3 origin;
float3 direction;
__device__ Ray(float3 origin, float3 direction) : origin(origin), direction(normalize(direction)) {}
__device__ float3 travel(float t) { return add3(origin, scale3(t, direction)); }
};
struct Material {
float4 color;
};
struct Sphere {
float3 center;
float radius;
Material material;
};
struct HitData {
bool hit;
float distance;
float3 position;
float3 normal;
Material material;
};
__device__ HitData intersect(Ray ray, Sphere sphere) {
}
__global__ void ray_trace(float4 *pixels, int width, int height) {
int2 pix_idx = make_int2(
blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y
);
if (pix_idx.x >= width || pix_idx.y >= height) return;
int idx = pix_idx.y * width + pix_idx.x;
pixels[idx] = make_float4(
(float)pix_idx.x / width,
(float)pix_idx.y / height,
0.0,
1.0
);
}