felixchenfy / Realtime-Action-Recognition

Apply ML to the skeletons from OpenPose; 9 actions; multiple people. (WARNING: I'm sorry that this is only good for course demo, not for real world applications !!! Those ary very difficult !!!)
MIT License
875 stars 256 forks source link

Converting to RNN #91

Open vidit2011998 opened 1 year ago

vidit2011998 commented 1 year ago

Hello, I tried converting it to RNN: class ClassifierOfflineTrain(object): ''' The classifer for offline training. The input features to this classifier are already processed by class FeatureGenerator. '''

def __init__(self):
    self._init_all_models()

    # self.clf = self._choose_model("Nearest Neighbors")
    # self.clf = self._choose_model("Linear SVM")
    # self.clf = self._choose_model("RBF SVM")
    # self.clf = self._choose_model("Gaussian Process")
    # self.clf = self._choose_model("Decision Tree")
    # self.clf = self._choose_model("Random Forest")
    # self.clf = self._choose_model("Neural Net")
    self.clf = self._choose_model("RNN")
    #self.clf = self._choose_model("Neural Net")

def predict(self, X):
    ''' Predict the class index of the feature X '''
    X = self.pca.transform(X)
    X = X.reshape(X.shape[0],1, X.shape[1])
    Y_predict = self.clf.predict(X)
    return Y_predict

def predict_and_evaluate(self, te_X, te_Y):
    ''' Test model on test set and obtain accuracy '''
    te_Y_predict = self.predict(te_X)
    # Convert one-hot encoded predictions to labels
    te_Y_predict = np.argmax(te_Y_predict, axis=1)
    N = len(te_Y)
    n = sum(te_Y_predict == te_Y)
    accu = n / N
    return accu, te_Y_predict

def train(self, X, Y):
    ''' Train model. The result is saved into self.clf '''
    n_components = min(NUM_FEATURES_FROM_PCA, X.shape[1])
    self.pca = PCA(n_components=n_components, whiten=True)
    self.pca.fit(X)
    print("Sum eig values:", np.sum(self.pca.explained_variance_ratio_))
    X_new = self.pca.transform(X)
    X_new = X_new.reshape(X_new.shape[0],1, X_new.shape[1])
    print("After PCA, X.shape = ", X_new.shape)
    self.clf.fit(X_new, Y, epochs = 100)

def _choose_model(self, name):
    self.model_name = name
    idx = self.names.index(name)
    return self.classifiers[idx]

def _init_all_models(self):
    self.names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
                  "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
                  "Naive Bayes", "QDA", "RNN"]
    self.model_name = None
    self.classifiers = [
        KNeighborsClassifier(5),
        SVC(kernel="linear", C=10.0),
        SVC(gamma=0.01, C=1.0, verbose=True),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(
            max_depth=30, n_estimators=100, max_features="auto"),
        MLPClassifier((20, 30, 40)),  # Neural Net
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis(),
        self._init_rnn_model()]  # RNN

def _init_rnn_model(self):
    ''' Initialize a simple RNN model '''
    model = Sequential()
    model.add(LSTM(314, return_sequences=True, input_shape=(None, NUM_FEATURES_FROM_PCA)))
    model.add(LSTM(314))
    model.add(Dense(4, activation="softmax"))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.fit(X_train, y_train, epochs=30)
    print(model.summary())
    return model

def _predict_proba(self, X):
    ''' Predict the probability of feature X belonging to each of the class Y[i] '''
    #Y_probs = self.clf.predict_proba(self.pca.transform(X))
    Y_probs = self.clf.predict_proba(X)
    return Y_probs  # np.array with a length of len(classes)

class FeatureGenerator(object): def init(self, window_size, is_adding_noise=False): ''' Arguments: window_size {int}: Number of adjacent frames for extracting features. is_adding_noise {bool}: Is adding noise to the joint positions and scale. noise_intensity {float}: The noise relative to the body height. ''' self._window_size = window_size self._is_adding_noise = is_adding_noise self._noise_intensity = NOISE_INTENSITY self.reset()

def reset(self):
    ''' Reset the FeatureGenerator '''
    self._x_deque = deque(maxlen=self._window_size)
    self._angles_deque = deque(maxlen=self._window_size)
    self._lens_deque = deque(maxlen=self._window_size)
    self._pre_x = None

def add_curr_skeletons(self, skeleton):
    '''
    Stack the input skeleton with previous skeletons, up to the window size.
    If the window size has not been reached, the output will be None.

    Returns:
        is_success {bool}: Return the feature only when
            the historical input skeletons are more than self._window_size.
        features {np.array}
    '''

    x = retrain_only_body_joints(skeleton)

    if not ProcFtr.has_neck_and_thigh(x):
        self.reset()
        return False, None
    else:
        ''' The input skeleton has a neck and at least one thigh '''
        # -- Preprocess x
        # Fill zeros, compute angles/lens
        x = self._fill_invalid_data(x)
        if self._is_adding_noise:
            # Add noise druing training stage to augment data
            x = self._add_noises(x, self._noise_intensity)
        x = np.array(x)

        # Push to deque
        self._x_deque.append(x)

        self._pre_x = x.copy()

        # -- Stack skeletons into a feature
        if len(self._x_deque) < self._window_size:
            return False, None
        else:
            # Stack all skeletons in the deque into a single feature vector
            features = np.concatenate(list(self._x_deque))

            return True, features.copy()

class ClassifierOnlineTest(object): ''' Classifier for online inference. The input data to this classifier is the raw skeleton data, so they are processed by class FeatureGenerator before sending to the self.model trained by class ClassifierOfflineTrain. '''

def __init__(self, model_path, action_labels, window_size, human_id=0):

    # -- Settings
    self.model = None
    self.human_id = human_id
    # load architecture
    with open('/home/goyal/Documents/pose2/Realtime-Action-Recognition/model/model_architecture.json', 'r') as f:
        self.model = model_from_json(f.read())

    # load weights
    self.model.load_weights('/home/goyal/Documents/pose2/Realtime-Action-Recognition/model/model_weights.h5')
    print("post load successful")

    if self.model is None:
        print("my Error: failed to load model")
        assert False
    self.action_labels = action_labels
    self.THRESHOLD_SCORE_FOR_DISP = 0.5

    # -- Time serials storage
    self.feature_generator = FeatureGenerator(window_size)
    self.reset()

def reset(self):
    self.feature_generator.reset()
    self.scores_hist = deque()
    self.scores = None

def predict(self, skeleton):
    ''' Predict the class (string) of the input raw skeleton '''
    LABEL_UNKNOWN = ""
    is_features_good, features = self.feature_generator.stack_skeletons(
        skeleton)

    if is_features_good:
        # convert to 2d array
        print(f"1. Shape of input features: {np.array(features).shape}")
        features = features.reshape(1,1,-1)
        print(f"2. Shape of input features: {np.array(features).shape}")

        curr_scores = np.argmax(self.model.predict(features), axis = -1)               <------------------I made change here
        self.scores = self.smooth_scores(curr_scores)

        if self.scores.max() < self.THRESHOLD_SCORE_FOR_DISP:  # If lower than threshold, bad
            prediced_label = LABEL_UNKNOWN
        else:
            predicted_idx = self.scores.argmax()
            prediced_label = self.action_labels[predicted_idx]
    else:
        prediced_label = LABEL_UNKNOWN
    return prediced_label

  1. As you said in add_curr_skeletons just stack the skeletons, but then the shape changes to (130,) for a window size of 5. When I train the RNN model for 50 PCA dim, the RNN model gives error that it accepted dimension of (50,) but got a dimension of (130,). Could you please suggest where I am going wrong?
abel-mesfin commented 10 months ago

It seems like the issue is related to the input dimensions of the RNN model during training and testing. In your train method, you are reshaping the input features to have the shape (batch_size, 1, num_features) before training the RNN model. However, during the prediction phase in the predict method of the ClassifierOnlineTest class, you are reshaping the features to have the shape (1, 1, -1).

To resolve this issue, make sure that the input dimensions during training and testing are consistent. In the train method, you are reshaping the input features using PCA, and in the _init_rnn_model method, you set input_shape=(None, NUM_FEATURES_FROM_PCA) for the first LSTM layer. Therefore, the correct input shape for the RNN model during training is (batch_size, timesteps, features).

Here's a suggested modification to your code:

Update the train method in the ClassifierOfflineTrain class to reshape the input features to have the shape (batch_size, window_size, num_features):

def train(self, X, Y):
    n_components = min(NUM_FEATURES_FROM_PCA, X.shape[1])
    self.pca = PCA(n_components=n_components, whiten=True)
    self.pca.fit(X)
    print("Sum eig values:", np.sum(self.pca.explained_variance_ratio_))
    X_new = self.pca.transform(X)

# Reshape input features to match the RNN input shape
X_new = X_new.reshape(X_new.shape[0], self._window_size, X_new.shape[1])

print("After PCA, X.shape = ", X_new.shape)
self.clf.fit(X_new, Y, epochs=100)

Update the predict method in the ClassifierOnlineTest class to reshape the input features consistently:

  def predict(self, skeleton):
      # ... (existing code)

if is_features_good:
    features = features.reshape(1, self._window_size, -1)
    print(f"Shape of input features: {np.array(features).shape}")

    curr_scores = np.argmax(self.model.predict(features), axis=-1)
    self.scores = self.smooth_scores(curr_scores)

    # ... (existing code)

By ensuring consistent reshaping of input features, you should be able to resolve the dimension mismatch issue during prediction. Does that help?