Open SAgiKPJH opened 1 year ago
2023-04-13
// AND 학습 - Only Weight
// nvcc -o "DeepLearning/Perceptron/Perceptron_Forward_Normal.cu" "DeepLearning/Perceptron/Perceptron_Forward_Normal.cu.cu" -lpng --expt-relaxed-constexpr -lcurand -lcuda -lcudart -lcublas
// "./DeepLearning/Perceptron/Perceptron_Forward_Normal.cu"
// Using CUDA library : -lcuda -lcudart -lcublas
// 1 Batch Size, 2 Model_count
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
#include <regex> // std::function
const int MODEL_COUNT = 2;
const int MODEL_LAYER[MODEL_COUNT] = {2, 1}; // Input Size만 변경
const int INPUT_SIZE = MODEL_LAYER[0];
const int OUTPUT_SIZE = MODEL_LAYER[MODEL_COUNT-1];
const int BATCH_SIZE = 1;
__global__ void forward_kernel(float* inputs, float* weights, float *bias, float* output, int* MODEL_LAYER, int* MODEL_COUNT)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
int index = threadIdx.x;
float sum = inputs[id * INPUT_SIZE] * weights[0] + inputs[id * INPUT_SIZE + 1] * weights[1] + bias[0];
output[index] = sum;
}
int array_weight_count(const int *array, int count) {
// [2 + 3 + 4 + 5 + 1] -> 2*3 + 3*4 + 4*5 + 5+1
int sum = 0;
for (int i = 0; i < count; i++) {
if (i >= count)
continue;
if (array[i] <= 0){
sum = -1;
std::cout << "MODEL_LAYER Setting Error" << std::endl;
exit(EXIT_FAILURE);
}
sum += array[i] * array[i+1];
}
return sum;
}
int array_bias_count(const int *array, int count) {
// [2 + 3 + 4 + 5 + 1] -> 3 + 4 + 5 + 1
int sum = 0;
for (int i = 1; i < count; i++) {
if (array[count] <= 0){
sum = -1;
std::cout << "MODEL_LAYER Setting Error" << std::endl;
exit(EXIT_FAILURE);
}
sum += array[i];
}
return sum;
}
// cuda_timmer("Execution time", []() { /*실행할 코드*/ });
void cuda_timmer(const std::string& msg, std::function<void()> f) {
cudaEvent_t start, stop;
float elapsedTime = 0.0f;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
f();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
std::cout << msg << " Elapsed time: " << elapsedTime << " ms" << std::endl;
}
int main()
{
// 입력 값, 가중치, 편향 초기화
float inputs[BATCH_SIZE][INPUT_SIZE] = { 0.0f };
int weight_count = array_weight_count(MODEL_LAYER, MODEL_COUNT);
int bias_count = array_bias_count(MODEL_LAYER, MODEL_COUNT);
float weights[weight_count] = {0};
// initialize_weight(weights);
float bias[bias_count] = {0};
// initialize_bias(bias);
// 출력 값을 저장할 배열 초기화
float output[BATCH_SIZE] = {0.0f};
// CUDA 메모리 할당
float *d_inputs, *d_output, *d_weights, *d_bias;
cudaMalloc(&d_inputs, BATCH_SIZE * INPUT_SIZE * sizeof(float));
cudaMalloc(&d_output, BATCH_SIZE * OUTPUT_SIZE * sizeof(float));
cudaMalloc(&d_weights, weight_count * sizeof(float));
cudaMalloc(&d_bias, bias_count * sizeof(float));
// 입력 데이터를 GPU로 복사
cudaMemcpy(d_inputs, inputs, BATCH_SIZE * INPUT_SIZE * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_weights, weights, weight_count * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_bias, bias, bias_count * sizeof(float), cudaMemcpyHostToDevice);
// 파라미터 GPU로 복사
int* d_model_layer;
int* d_model_count;
cudaMalloc(&d_model_layer, MODEL_COUNT * sizeof(int));
cudaMalloc(&d_model_count, sizeof(int));
cudaMemcpy(d_model_layer, MODEL_LAYER, MODEL_COUNT * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_model_count, &MODEL_COUNT, sizeof(int), cudaMemcpyHostToDevice);
// CUDA 커널 함수 호출
forward_kernel<<<1, BATCH_SIZE>>>(d_inputs, d_weights, d_bias, d_output, d_model_layer, d_model_count);
// 출력 데이터를 호스트로 복사
cudaMemcpy(output, d_output, BATCH_SIZE * sizeof(float), cudaMemcpyDeviceToHost);
// 결과 출력
for (int i = 0; i < BATCH_SIZE; i++) {
std::cout << "AND(" << inputs[i][0] << ", " << inputs[i][1] << ") = " << output[i] << std::endl;
}
return 0;
}