The code for week 2 can be found in this Google Colab notebook; you can open your own (click "new notebook"). Make sure that you are using the "T4 GPU" runtime when running the cells (you will find it under runtime --> change runtime type).
!pip install pycuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
import numpy as np
from PIL import Image
from time import perf_counter
WIDTH = 600
HEIGHT = 600
image_buffer = np.zeros((HEIGHT, WIDTH, 4), dtype=np.float32)
with open("ray_trace.cu") as cuda_file:
mod = SourceModule(cuda_file.read())
ray_trace = mod.get_function("ray_trace")
block_size = (16, 16, 1)
grid_size = (
(WIDTH + block_size[0] - 1) // block_size[0],
(HEIGHT + block_size[1] - 1) // block_size[1],
)
t0 = perf_counter()
ray_trace(
cuda.Out(image_buffer),
np.int32(WIDTH),
np.int32(HEIGHT),
block=block_size,
grid=grid_size
)
t1 = perf_counter()
image_data = (image_buffer.clip(0, 1) * 255).astype(np.uint8)
image = Image.fromarray(np.flip(image_data, 0))
image.save('result.png')
print(f'generated image in {t1 - t0} seconds')
image
%%writefile ray_trace.cu
__device__ float3 operator+(float3 a, float3 b) { return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); }
__device__ float3 operator-(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ float3 operator*(float s, float3 v) { return make_float3(s * v.x, s * v.y, s * v.z); }
__device__ float dot(float3 a, float3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
__device__ float length2(float3 v) { return v.x * v.x + v.y * v.y + v.z * v.z; }
__device__ float length(float3 v) { return sqrtf(length2(v)); }
__device__ float3 normalize(float3 v) { return (1.0f / length(v)) * v; }
struct Ray {
float3 origin;
float3 direction;
__device__ Ray(float3 origin, float3 direction) : origin(origin), direction(normalize(direction)) {}
};
struct Sphere {
float3 center;
float radius;
};
struct HitData {
bool hit;
float distance;
float3 position;
float3 normal;
};
__device__ HitData intersect(Ray ray, Sphere sphere) {
float3 oc = sphere.center - ray.origin;
float tc = dot(oc, ray.direction);
if (tc < 0.0f) {
return {0};
}
float d2 = length2(oc) - tc * tc;
if (d2 > sphere.radius * sphere.radius) {
return {0};
}
float th = sqrtf(sphere.radius * sphere.radius - d2);
float distance = tc - th;
float3 position = ray.origin + distance * ray.direction;
float3 normal = normalize(position - sphere.center);
return { true, distance, position, normal };
}
__global__ void ray_trace(float4 *pixels, int width, int height) {
int2 pix_idx = make_int2(
blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y
);
if (pix_idx.x >= width || pix_idx.y >= height) return;
int idx = pix_idx.y * width + pix_idx.x;
/*
pixels[idx] = make_float4(
(float)pix_idx.x / width,
(float)pix_idx.y / height,
0.0, 1.0
);
*/
float aspect_ratio = (float) width / height;
Ray camera_ray {
make_float3(0.0f, 0.0f, 0.0f),
make_float3(
((float)pix_idx.x / width * 2.0f - 1.0f) * aspect_ratio,
(float)pix_idx.y / height * 2.0f - 1.0f,
-1.0f
)
};
Sphere sphere = { make_float3(0.0f, 0.0f, -5.0f), 1.0f };
HitData hit = intersect(camera_ray, sphere);
if (hit.hit) {
pixels[idx] = make_float4(hit.normal.x, hit.normal.y, hit.normal.z, 1.0);
} else {
pixels[idx] = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
}
}