Initial

2023-02-06 08:42:56 +00:00 · 2023-02-06 08:42:56 +00:00 · 27a47446b6
commit 27a47446b6
11 changed files with 351 additions and 0 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "glm"]
+	path = glm
+	url = https://github.com/g-truc/glm
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.10)
+project(verlet CUDA CXX)
+
+set(CMAKE_CUDA_STANDARD 17)
+
+set(CMAKE_CUDA_ARCHITECTURES 75)
+
+find_package(SDL2 REQUIRED)
+
+add_subdirectory(glm)
+
+set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-G")
+
+add_executable(cudagravity main.cu particle.cu particle.cuh definitions.cuh cudaMemory.cuh sdlHelper.cu sdlHelper.cuh)
+
+target_link_libraries(cudagravity PUBLIC glm::glm SDL2::SDL2)
+
+set_target_properties(cudagravity PROPERTIES
+        CUDA_SEPARABLE_COMPILATION ON)
--- a/cudaMemory.cuh
+++ b/cudaMemory.cuh
@ -0,0 +1,62 @@
+//
+// Created by kai on 02/02/23.
+//
+
+#pragma once
+
+#ifndef VERLET_CUDAMEMORY_CUH
+#define VERLET_CUDAMEMORY_CUH
+
+#include "definitions.cuh"
+#include <cstdio>
+
+
+template<typename T>
+class CudaMemory {
+public:
+    explicit CudaMemory(size_t array_length) {
+        CUDA_CALL(cudaMalloc(&this->devPtr, sizeof(T) * array_length));
+        this->hostPtr = (T*)(malloc(sizeof(T) * array_length));
+        memset(this->hostPtr, 0, array_length);
+        this->N = array_length;
+    }
+
+    CudaMemory() {
+        CUDA_CALL(cudaMallocManaged(&this->devPtr, sizeof(T)));
+        this->hostPtr = (T*)malloc(sizeof(T));
+        memset(this->hostPtr, 0, 1);
+        this->N = 1;
+    }
+
+    ~CudaMemory() {
+        CUDA_CALL(cudaFree(this->devPtr))
+        free(this->hostPtr);
+    }
+
+    T* getPointer() {
+        return this->hostPtr;
+    }
+
+    T* getDevicePointer() {
+        return this->devPtr;
+    }
+
+    void send() {
+        CUDA_CALL(cudaMemcpy(this->devPtr, this->hostPtr, sizeof(T) * this->N, cudaMemcpyHostToDevice));
+    }
+
+    void sync() {
+        CUDA_CALL(cudaMemcpy(this->hostPtr, this->devPtr, sizeof(T) * this->N, cudaMemcpyDeviceToHost));
+    }
+
+    T& operator[](size_t idx) {
+        return this->hostPtr[idx];
+    }
+
+private:
+    T* hostPtr;
+    T* devPtr;
+    size_t N;
+};
+
+#endif //VERLET_CUDAMEMORY_CUH
--- a/definitions.cuh
+++ b/definitions.cuh
@ -0,0 +1,4 @@
+#include <glm/vec2.hpp>
+
+typedef glm::vec<2, float, glm::defaultp> vec2;
+#define CUDA_CALL(CALL) if (CALL != cudaSuccess) { printf("CUDA call failed at %s:%i\n", __FILE__,__LINE__); exit(-1); }
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit efec5db081e3aad807d0731e172ac597f6a39447
--- a/main.cu
+++ b/main.cu
@ -0,0 +1,118 @@
+#include <iostream>
+#include "definitions.cuh"
+#include "particle.cuh"
+#include "cudaMemory.cuh"
+#include "sdlHelper.cuh"
+#include <glm/glm.hpp>
+#include <random>
+
+__constant__ const float G_CONSTANT = 1;
+__constant__ float dt = 0.1;
+__constant__ const float softening = 0.0000;
+__constant__ const dim3 threadsPerBlock_gpu = dim3(1024);
+const dim3 threadsPerBlock_cpu = dim3(1024);
+__constant__ dim3 n_blocksPerGridGpu;
+
+__global__ void getAccels(Particle* particles, vec2* accels, size_t N, size_t i) {
+    size_t id = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (id < N) {
+        size_t j = id;
+
+        if (j == i) {
+            return;
+        }
+
+        Particle particle = particles[i];
+        Particle other_particle = particles[j];
+
+        vec2 d = {
+                other_particle.position.x - particle.position.x,
+                other_particle.position.y - particle.position.y
+        };
+
+        float dist = glm::distance(particle.position, other_particle.position);
+
+        vec2 f = -G_CONSTANT * ((particle.mass * other_particle.mass) / (dist * dist)) * glm::normalize(other_particle.position - particle.position);
+
+        vec2 accel = f / particle.mass;
+
+        atomicAdd(&(accels[i].x), accel.x);
+        atomicAdd(&(accels[i].y), accel.y);
+    }
+}
+
+__global__ void run_step(Particle* particles, vec2* accels, size_t N) {
+    size_t id = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (id < N) {
+        Particle& cur_particle = particles[id];
+
+        cur_particle.velocity.x += accels[id].x * dt / 2.0f;
+        cur_particle.velocity.y += accels[id].y * dt / 2.0f;
+
+        cur_particle.position.x += cur_particle.velocity.x * dt;
+        cur_particle.position.y += cur_particle.velocity.y * dt;
+
+        getAccels<<<n_blocksPerGridGpu, threadsPerBlock_gpu>>>(particles, accels, N, id);
+
+        __syncthreads();
+
+        cur_particle.velocity.x += accels[id].x * dt / 2.0f;
+        cur_particle.velocity.y += accels[id].y * dt / 2.0f;
+    }
+}
+
+int main() {
+    sdlHelper sdlHelper;
+
+    size_t N = 20000;
+
+    CudaMemory<Particle> particles = CudaMemory<Particle>(N);
+    CudaMemory<vec2> accelerations = CudaMemory<vec2>(N);
+
+    std::default_random_engine generator(std::random_device{}());
+    std::uniform_real_distribution<float> mass_distribution(1.0, 100000.0);
+    std::uniform_real_distribution<float> pos_distribution(-0.1, 0.1);
+    std::uniform_real_distribution<float> vel_distribution(-2.0, 2.0);
+
+    for (size_t i = 0; i < N; i++) {
+        accelerations[i] = {0, 0};
+        particles[i] = {
+                mass_distribution(generator),
+                {pos_distribution(generator), pos_distribution(generator)},
+                {vel_distribution(generator), vel_distribution(generator)}
+        };
+    }
+
+    cudaDeviceGetAttribute((int*)(&threadsPerBlock_cpu.x), cudaDevAttrMaxThreadsPerBlock, 0);
+
+    dim3 n_blocksPerGrid = dim3(
+            ((N) + threadsPerBlock_cpu.x - 1) / threadsPerBlock_cpu.x
+    );
+
+    printf("n_blocksPerGrid: %i\n", n_blocksPerGrid.x);
+    printf("threadsPerBlock: %i\n", threadsPerBlock_cpu.x);
+
+    cudaMemcpyToSymbol(n_blocksPerGridGpu, &n_blocksPerGrid, sizeof(dim3));
+    cudaMemcpyToSymbol(threadsPerBlock_gpu, &threadsPerBlock_cpu, sizeof(dim3));
+
+    particles.send();
+    accelerations.send();
+
+    for (int i = 0; i < 2; i++) {
+        run_step<<<n_blocksPerGrid, threadsPerBlock_cpu>>>(particles.getDevicePointer(), accelerations.getDevicePointer(), N);
+
+        cudaDeviceSynchronize();
+        particles.sync();
+        accelerations.send();
+
+        if (!sdlHelper.drawParticles(particles.getPointer(), N)) {
+            break;
+        }
+    }
+
+    cudaDeviceSynchronize();
+
+    return 0;
+}
--- a/objects.def
+++ b/objects.def
--- a/particle.cu
+++ b/particle.cu
@ -0,0 +1,5 @@
+//
+// Created by kai on 02/02/23.
+//
+
+#include "particle.cuh"
--- a/particle.cuh
+++ b/particle.cuh
@ -0,0 +1,16 @@
+
+
+#ifndef VERLET_PARTICLE_CUH
+#define VERLET_PARTICLE_CUH
+
+#include "definitions.cuh"
+
+class Particle {
+public:
+    float mass;
+    vec2 position;
+    vec2 velocity;
+};
+
+
+#endif //VERLET_PARTICLE_CUH
--- a/sdlHelper.cu
+++ b/sdlHelper.cu
@ -0,0 +1,88 @@
+//
+// Created by kai on 05/02/23.
+//
+
+#include "sdlHelper.cuh"
+
+sdlHelper::sdlHelper() {
+    if (getenv("NO_DISPLAY") != nullptr) {
+        this->displaying = false;
+        return;
+    }
+
+    if (instance == nullptr) {
+        instance = this;
+
+        this->renderThread = std::thread(&sdlHelper::renderThreadFunc, this);
+    }
+}
+
+sdlHelper::~sdlHelper() {
+    if (instance == this) {
+        running = false;
+
+        this->renderThread.join();
+        SDL_Quit();
+    }
+}
+
+bool sdlHelper::drawParticles(Particle* particles, size_t particleCount) {
+    if (!this->displaying) return this->running;
+    this->particleRects.clear();
+    this->particleRects.reserve(particleCount);
+
+    float maxDistance = 0;
+
+    for (int i = 0; i < particleCount; i++) {
+        maxDistance = std::max(maxDistance, particles[i].position.x);
+        maxDistance = std::max(maxDistance, particles[i].position.y);
+    }
+
+    for (int i = 0; i < particleCount; i++) {
+        SDL_Rect rect;
+
+        vec2 pos = particles[i].position;
+        pos /= (maxDistance * 1.1);
+        pos *= 512;
+        pos += vec2(512, 512);
+
+        rect.x = pos.x - 1;
+        rect.y = pos.y - 1;
+        rect.w = 2;
+        rect.h = 2;
+        this->particleRects.push_back(rect);
+    }
+    return this->running;
+}
+
+sdlHelper *sdlHelper::getInstance() {
+    return this->instance;
+}
+
+void sdlHelper::renderThreadFunc() {
+    this->window = SDL_CreateWindow("cudaGravity", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 1024, 1024, SDL_WINDOW_VULKAN);
+    this->renderer = SDL_CreateRenderer(this->window, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
+
+    while (running) {
+        SDL_Event e;
+
+        while (SDL_PollEvent(&e)) {
+            if (e.type == SDL_QUIT) {
+                running = false;
+            }
+        }
+
+        SDL_SetRenderDrawColor(this->renderer, 0, 0, 0, 255);
+        SDL_RenderClear(this->renderer);
+        SDL_SetRenderDrawColor(this->renderer, 255, 255, 255, 255);
+
+        for (auto & particleRect : particleRects) {
+            SDL_RenderFillRect(this->renderer, &particleRect);
+        }
+
+        SDL_RenderPresent(this->renderer);
+    }
+
+    SDL_DestroyRenderer(this->renderer);
+    SDL_DestroyWindow(this->window);
+}
--- a/sdlHelper.cuh
+++ b/sdlHelper.cuh
@ -0,0 +1,35 @@
+//
+// Created by kai on 05/02/23.
+//
+
+#ifndef VERLET_SDLHELPER_CUH
+#define VERLET_SDLHELPER_CUH
+
+#include <SDL2/SDL.h>
+#include <vector>
+#include <thread>
+#include "particle.cuh"
+
+class sdlHelper {
+public:
+    sdlHelper();
+    ~sdlHelper();
+
+    bool drawParticles(Particle* particles, size_t particleCount);
+    sdlHelper* getInstance();
+private:
+    void renderThreadFunc();
+
+    sdlHelper* instance = nullptr;
+    SDL_Window* window = nullptr;
+    SDL_Renderer* renderer = nullptr;
+    bool running = true;
+    bool displaying = true;
+
+    std::thread renderThread;
+
+    std::vector<SDL_Rect> particleRects;
+};
+
+
+#endif //VERLET_SDLHELPER_CUH
				`@ -0,0 +1 @@`
				`Subproject commit efec5db081e3aad807d0731e172ac597f6a39447`