Zhehui-Huang / quad-swarm-rl

Additional environments compatible with OpenAI gym
118 stars 44 forks source link

sim2real c code problem #49

Open sAz-G opened 11 months ago

sAz-G commented 11 months ago

I let sim2real.py create the c code for network evaluation, however I am a bit confused about the calculation.

When I run the python script, the following c code is produced (I deleted the weights to keep this issue shorter)


#include <random>
#include <vector>
#include <iostream>
#include <algorithm>
#include <cstring> 

typedef struct control_t_n {
    float thrust_0;
    float thrust_1;
    float thrust_2;
    float thrust_3;
} control_t_n;

void networkEvaluate(control_t_n* control_n, const float* state_array);

static const int NEIGHBORS = 6;
static const int NBR_DIM = 6; 

static const int NUM_OBSTACLES = 2; 
static const int OBST_DIM = 9;

float linear(float num) {
    return num;
}

float sigmoid(float num) {
    return 1 / (1 + exp(-num));
}

float relu(float num) {
    if (num > 0) {
        return num;
    } else {
        return 0;
    }
}

static const int structure [6][2] = {{6, 8},{8, 8},{18, 16},{16, 16},{24, 32},{32, 4}};
static float output_0[8];
static float output_1[8];
static float output_2[16];
static float output_3[16];
static float output_4[32];
static float output_5[8];
static float output_6[8];
static float output_7[16];
static float output_8[16];
static float output_9[32];
static float output_10[1];
static float output_11[4];

static const float actor_encoder_neighbor_encoder_embedding_mlp_0_weight[6][8];
static const float actor_encoder_neighbor_encoder_embedding_mlp_2_weight[8][8];
static const float actor_encoder_self_encoder_0_weight[18][16];
static const float actor_encoder_self_encoder_2_weight[16][16];
static const float actor_encoder_feed_forward_0_weight[24][32];
static const float action_parameterization_distribution_linear_weight[32][4];
static const float actor_encoder_neighbor_encoder_embedding_mlp_0_bias[8];
static const float actor_encoder_neighbor_encoder_embedding_mlp_2_bias[8];
static const float actor_encoder_self_encoder_0_bias[16];
static const float actor_encoder_self_encoder_2_bias[16];
static const float actor_encoder_feed_forward_0_bias[32];
static const float critic_encoder_neighbor_encoder_embedding_mlp_0_bias[8];
static const float critic_encoder_neighbor_encoder_embedding_mlp_2_bias[8];
static const float critic_encoder_self_encoder_0_bias[16];
static const float critic_encoder_self_encoder_2_bias[16];
static const float critic_encoder_feed_forward_0_bias[32];
static const float critic_linear_bias[1];
static const float action_parameterization_distribution_linear_bias[4];

void networkEvaluate(struct control_t_n *control_n, const float *state_array) {
        for (int i = 0; i < structure[0][1]; i++) {
            output_0[i] = 0;
            for (int j = 0; j < structure[0][0]; j++) {
                output_0[i] += state_array[j] * actor_encoder_neighbor_encoder_embedding_mlp_0_weight[j][i];
            }
            output_0[i] += actor_encoder_neighbor_encoder_embedding_mlp_0_bias[i];
            output_0[i] = tanhf(output_0[i]);
        }

        for (int i = 0; i < structure[1][1]; i++) {
            output_1[i] = 0;
            for (int j = 0; j < structure[1][0]; j++) {
                output_1[i] += output_0[j] * actor_encoder_neighbor_encoder_embedding_mlp_2_weight[j][i];
            }
            output_1[i] += actor_encoder_neighbor_encoder_embedding_mlp_2_bias[i];
            output_1[i] = tanhf(output_1[i]);
        }

        for (int i = 0; i < structure[2][1]; i++) {
            output_2[i] = 0;
            for (int j = 0; j < structure[2][0]; j++) {
                output_2[i] += output_1[j] * actor_encoder_self_encoder_0_weight[j][i];
            }
            output_2[i] += actor_encoder_self_encoder_0_bias[i];
            output_2[i] = tanhf(output_2[i]);
        }

        for (int i = 0; i < structure[3][1]; i++) {
            output_3[i] = 0;
            for (int j = 0; j < structure[3][0]; j++) {
                output_3[i] += output_2[j] * actor_encoder_self_encoder_2_weight[j][i];
            }
            output_3[i] += actor_encoder_self_encoder_2_bias[i];
            output_3[i] = tanhf(output_3[i]);
        }

        for (int i = 0; i < structure[4][1]; i++) {
            output_4[i] = 0;
            for (int j = 0; j < structure[4][0]; j++) {
                output_4[i] += output_3[j] * actor_encoder_feed_forward_0_weight[j][i];
            }
            output_4[i] += actor_encoder_feed_forward_0_bias[i];
            output_4[i] = tanhf(output_4[i]);
        }

                for (int i = 0; i < structure[5][1]; i++) {
                    output_5[i] = 0;
                    for (int j = 0; j < structure[5][0]; j++) {
                        output_5[i] += output_4[j] * action_parameterization_distribution_linear_weight[j][i];
                    }
                    output_5[i] += critic_encoder_neighbor_encoder_embedding_mlp_0_bias[i];
                }

            control_n->thrust_0 = output_5[0];
            control_n->thrust_1 = output_5[1];
            control_n->thrust_2 = output_5[2];
            control_n->thrust_3 = output_5[3];  
    }

int main(const float *indatav, size_t size, float *outdatav)
{
    size_t i;
    control_t_n motorThrusts;
    networkEvaluate(&motorThrusts, indatav);

    outdatav[0] = motorThrusts.thrust_0;
    outdatav[1] = motorThrusts.thrust_1;
    outdatav[2] = motorThrusts.thrust_2;
    outdatav[3] = motorThrusts.thrust_3;
    return EXIT_SUCCESS;
}

The following for loop tries to access an element that exceeds the array size (output_3 has dimension 16 but structure[4][0] is 24).

   for (int j = 0; j < structure[4][0]; j++) {
                output_4[i] += output_3[j] * actor_encoder_feed_forward_0_weight[j][i];
            }

Also, shouldn't there be an array with size 24 to store the input of the self encoder and neighbor encoder stacked together? (assuming the self encoder has 16 neurons and the neighbor encoder 8 neurons).

I am wondering if these are problems with my networks structure or rather a problem with the script sim2real.py .

yang-zj1026 commented 7 months ago

Hi,

sorry that I just saw this. It seems that you're generating c code for multi-drone deepset model. If so, I recommend using code provided here. Additionally, if your project involves multi-drone obstacle avoidance, you might consider trying our single head attention model. It has been tested and proven effective with crazyflies

AI4IS commented 6 months ago

How to run the NN model to control crazyflie? Are there any guides?