In week 5 we explored the concept of pseudo-random number generation using linear congruential generators. We then implemented one in CUDA C++.
Code for week 5 can be found in this Google colab notebook
!pip install pycuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
from PIL import Image
from time import perf_counter
WIDTH = 600
HEIGHT = 600
image_buffer = np.zeros((HEIGHT, WIDTH, 4), dtype=np.float32)
with open("ray_trace.cu") as cuda_file:
mod = SourceModule(cuda_file.read())
ray_trace = mod.get_function("ray_trace")
block_size = (16, 16, 1)
grid_size = (
(WIDTH + block_size[0] - 1) // block_size[0],
(HEIGHT + block_size[1] - 1) // block_size[1],
1
)
t0 = perf_counter()
ray_trace(
cuda.Out(image_buffer),
np.int32(WIDTH),
np.int32(HEIGHT),
block=block_size,
grid=grid_size
)
t1 = perf_counter()
image_data = (image_buffer.clip(0, 1) * 255).astype(np.uint8)
image = Image.fromarray(image_data)
image.save('result.png')
print(f'generated image in {t1 - t0} seconds')
image
%%writefile ray_trace.cu
__device__ float3 add3(float3 u, float3 v) { return make_float3(u.x + v.x, u.y + v.y, u.z + v.z); }
__device__ float3 sub3(float3 u, float3 v) { return make_float3(u.x - v.x, u.y - v.y, u.z - v.z); }
__device__ float3 scale3(float s, float3 v) { return make_float3(s * v.x, s * v.y, s * v.z); }
__device__ float dot(float3 u, float3 v) { return u.x * v.x + u.y * v.y + u.z * v.z; }
__device__ float length(float3 v) { return sqrtf(dot(v, v)); }
__device__ float3 normalize(float3 v) { return scale3(1.0/length(v), v); }
__device__ unsigned int lcg(unsigned int &seed) {
seed = (950513337 * seed + 681462833) % 1073741824;
return seed;
}
__device__ float randf(float min, float max, unsigned int &seed) {
return ((float)lcg(seed) / 1073741824) * (max - min) + min;
}
struct Ray {
float3 origin;
float3 direction;
__device__ Ray(float3 origin, float3 direction) : origin(origin), direction(normalize(direction)) {}
__device__ float3 travel(float t) { return add3(origin, scale3(t, direction)); }
};
struct Material {
float4 color;
};
struct Sphere {
float3 center;
float radius;
Material material;
};
struct HitData {
bool hit;
float distance;
float3 position;
float3 normal;
Material material;
};
__device__ HitData intersect(Ray ray, Sphere sphere) {
float3 oc = sub3(sphere.center, ray.origin);
float tc = dot(oc, ray.direction);
if (tc < 0.0) {
return {0};
}
float d2 = dot(oc, oc) - tc * tc;
if (d2 > sphere.radius * sphere.radius) {
return {0};
}
float th = sqrtf(sphere.radius * sphere.radius - d2);
float distance = tc - th;
float3 position = ray.travel(distance);
float3 normal = normalize(sub3(position, sphere.center));
return { true, distance, position, normal, sphere.material };
}
__global__ void ray_trace(float4 *pixels, int width, int height) {
int2 pix_idx = make_int2(
blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y
);
if (pix_idx.x >= width || pix_idx.y >= height) return;
int idx = pix_idx.y * width + pix_idx.x;
unsigned int seed = 67;
Ray ray(
make_float3(0, 0, 0),
make_float3(
2.0 * pix_idx.x / width - 1,
2.0 * pix_idx.y / height - 1,
-1.0
)
);
Sphere sphere = {
make_float3(randf(-1.0, 1.0, seed), 0.0,-3.0), 2.0,
{ make_float4(1.0, 0.0, 0.0, 1.0) }
};
HitData hit = intersect(ray, sphere);
if (hit.hit) {
pixels[idx] = hit.material.color;
} else {
pixels[idx] = make_float4(0.0, 0.0, 0.0, 1.0);
}
}