snorkel-team / snorkel

A system for quickly generating training data with weak supervision
https://snorkel.org
Apache License 2.0
5.81k stars 857 forks source link

tensorflow ....InvalidArgumentError: indices[0,1] = 2055 is not in [0, 256) #1560

Closed frenomer closed 4 years ago

frenomer commented 4 years ago

Hi I am trying to run the following cell

`class TFRandomForestTrain: def init(self, session): self.session = session

    # Input X, output Y
    self.X = tf.placeholder("float", [N_BATCH, 8, 8, 1], name='X')
    self.Y = tf.placeholder("float", [N_BATCH, N_LABEL], name='Y')

    ##################################################
    # Initialize network weights
    ##################################################
    self.w = init_weights([3, 3, 1, 32])
    self.w2 = init_weights([3, 3, 32, 64])
    self.w3 = init_weights([3, 3, 64, 128])

    self.w4_ensemble = []
    self.w_d_ensemble = []
    self.w_l_ensemble = []
    for i in range(N_TREE):
        self.w4_ensemble.append(init_weights([128 * 4 * 4, 625]))
        self.w_d_ensemble.append(init_prob_weights([625, N_LEAF], -1, 1))
        self.w_l_ensemble.append(init_prob_weights([N_LEAF, N_LABEL], -2, 2))

    self.p_keep_conv = tf.placeholder("float")
    self.p_keep_hidden = tf.placeholder("float")

    ##################################################
    # Define a fully differentiable deep-ndf
    ##################################################
    # With the probability decision_p, route a sample to the right branch
    self.decision_p_e, self.leaf_p_e = model(self.X, self.w, self.w2, self.w3, self.w4_ensemble, self.w_d_ensemble,
                                             self.w_l_ensemble, self.p_keep_conv, self.p_keep_hidden)

    self.flat_decision_p_e = []

    # iterate over each tree
    for decision_p in self.decision_p_e:
        # Compute the complement of d, which is 1 - d
        # where d is the sigmoid of fully connected output
        decision_p_comp = tf.subtract(tf.ones_like(decision_p), decision_p)

        # Concatenate both d, 1-d
        decision_p_pack = tf.stack([decision_p, decision_p_comp])

        # Flatten/vectorize the decision probabilities for efficient indexing
        self.flat_decision_p = tf.reshape(decision_p_pack, [-1])
        self.flat_decision_p_e.append(self.flat_decision_p)

    in_repeat = N_LEAF / 2
    out_repeat = N_BATCH

    batch_0_indices = \
        tf.tile(tf.expand_dims(tf.range(0, N_BATCH * N_LEAF, N_LEAF), 1),
                [1, N_LEAF])

    batch_complement_indices = \
        np.array([[0] * int(in_repeat), [N_BATCH * N_LEAF] * int(in_repeat)]
                 * out_repeat).reshape(N_BATCH, N_LEAF)

    # First define the routing probabilities d for root nodes
    self.mu_e = []

    #indices_var = Variable((batch_0_indices + batch_complement_indices) )

    # iterate over each tree
    for i, flat_decision_p in enumerate(self.flat_decision_p_e):
        mu = tf.gather(flat_decision_p, tf.add(batch_0_indices, batch_complement_indices))
        self.mu_e.append(mu)

    # from the second layer to the last layer, we make the decision nodes
    for d in range(1, DEPTH + 1):
        indices = tf.range(2 ** d, 2 ** (d + 1)) -1
        tile_indices = tf.reshape(tf.tile(tf.expand_dims(indices, 1),
                                          [1, 2 ** (DEPTH - d + 1)]), [1, -1])
        batch_indices = tf.add(batch_0_indices, tf.tile(tile_indices, [N_BATCH, 1]))

        in_repeat = in_repeat / 2
        out_repeat = out_repeat * 2

        # Again define the indices that picks d and 1-d for the node
        batch_complement_indices = \
            np.array([[0] * int(in_repeat), [N_BATCH * N_LEAF] * int(in_repeat)]
                     * out_repeat).reshape(N_BATCH, N_LEAF)

        mu_e_update = []
        for mu, flat_decision_p in zip(self.mu_e, self.flat_decision_p_e):
            mu = tf.multiply(mu, tf.gather(flat_decision_p,
                                           tf.add(batch_indices, batch_complement_indices)))
            mu_e_update.append(mu)

        self.mu_e = mu_e_update

    ##################################################
    # Define p(y|x)
    ##################################################
    self.py_x_e = []
    for mu, leaf_p in zip(self.mu_e, self.leaf_p_e):
        # average all the leaf p
        py_x_tree = tf.reduce_mean(
            tf.multiply(tf.tile(tf.expand_dims(mu, 2), [1, 1, N_LABEL]),
                        tf.tile(tf.expand_dims(leaf_p, 0), [N_BATCH, 1, 1])), 1)
        self.py_x_e.append(py_x_tree)

    self.py_x_e = tf.stack(self.py_x_e)
    self.py_x = tf.reduce_mean(self.py_x_e, 0)`

Load dataset `dataset = read_csv('C:/Users/Benji/Desktop/dataset/kddcup99.csv', header=0, index_col=0,encoding='unicode_escape')

dataset = dataset.values

X = dataset[:, :-1]

y = dataset[:, -1]

scaler = RobustScaler()  # RobustScaler() PowerTransformer(method='yeo-johnson')

# scaler = MinMaxScaler(feature_range=(0, 1))

Xb = scaler.fit_transform(X)

#print (len(Xb[0]))

data_X = np.pad(Xb, ((0, 0), (0, 64 - len(Xb[0]))), 'constant').reshape(-1,  8, 8,  1)

encoder = LabelEncoder().fit(y)

encoded_Y = encoder.transform(y)

# convert integers to dummy variables (i.e. one hot encoded)

data_y = np_utils.to_categorical(encoded_Y)

trX, teX, trY, teY = train_test_split(data_X, data_y, test_size=0.3)`

And I am getting this error

InvalidArgumentError: indices[0,1] = 2055 is not in [0, 256)

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "C:/Users/Benji/PycharmProjects/Code/NEWWORK6.py", line 292, in classifier.fit(trX=trX, trY=trY, teX=teX, teY=teY) File "C:/Users/Benji/PycharmProjects/Code/NEWWORK6.py", line 229, in fit self.p_keep_hidden: tr_p_keep_hidden}) File "C:\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run run_metadata_ptr) File "C:\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run feed_dict_tensor, options, run_metadata) File "C:\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run run_metadata) File "C:\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[0,1] = 2055 is not in [0, 256) [[node GatherV2_16 (defined at \Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]

Original stack trace for 'GatherV2_16': File "/Users/Benji/PycharmProjects/Code/NEWWORK6.py", line 291, in classifier = TFRandomForestTrain( session=sess) File "/Users/Benji/PycharmProjects/Code/NEWWORK6.py", line 190, in init tf.add(batch_indices, batch_complement_indices))) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\util\dispatch.py", line 180, in wrapper return target(*args, *kwargs) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\ops\array_ops.py", line 3956, in gather params, indices, axis, name=name) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 4081, in gather_v2 batch_dims=batch_dims, name=name) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper op_def=op_def) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func return func(args, **kwargs) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op attrs, op_def, compute_device) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal op_def=op_def) File "\Users\Benji\Anaconda2\envs\ben\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in init self._traceback = tf_stack.extract_stack()

henryre commented 4 years ago

Hi @frenomer, thanks for posting. From looking at the post you provided, I can't tell how this issue is related to Snorkel. Let us know if you can provide more detail.