assignment1-2 - Githubissues

JinwoongKim commented 6 years ago

svm_loss_naive

SVM loss function

 for i in xrange(num_train):
    scores = X[i].dot(W) # scores to classes e.g., [ 3.2, 1.3, 2.2] for cat, car, frog
    score_yi = scores[y[i]] # score for input e.g., 3.2 if X[i] is cat
    for j in xrange(num_classes):
      if j == y[i]:
        continue
      loss += max(0,scores[j] - score_yi + 1)

screen shot 2018-08-19 at 19 54 00 Link

  dW = np.zeros(W.shape) # initialize the gradient as zero
  # compute the loss and the gradient
  num_classes = W.shape[1]
  num_train = X.shape[0]
  loss = 0.0
  for i in xrange(num_train):
    scores = X[i].dot(W)
    score_yi = scores[y[i]]
    number_of_loss_contributor = 0
    for j in xrange(num_classes):
      if j == y[i]:
        continue
      margin = scores[j] - score_yi + 1 # note delta = 1
      if margin > 0:
        loss += margin
        dW[:, j] += X[i, :]
        number_of_loss_contributor += 1
    dW[:, y[i]] += (number_of_loss_contributor * -X[i])

  # Right now the loss is a sum over all training examples, but we want it
  # to be an average instead so we divide by num_train.
  loss /= num_train
  dW /= num_train

  # Add regularization to the loss.
  loss += reg * np.sum(W * W)
  dW += 2 * reg * W

  return loss, dW

screen shot 2018-08-19 at 20 11 38

screen shot 2018-08-19 at 20 03 12

JinwoongKim commented 6 years ago

Stochastic Gradient Descent

def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
      batch_size=200, verbose=False):
        ....      
      # Sample batch_size elements from the training data and their
      # corresponding labels to use in this round of gradient descent.
      choices = np.random.choice(X.shape[0], batch_size)
      #X_batch = np.take(X, choices)
      #y_batch = np.take(y, choices)
      X_batch = X[choices]
      y_batch = y[choices]

      # evaluate loss and gradient
      loss, grad = self.loss(X_batch, y_batch, reg)
      loss_history.append(loss)

      # perform parameter update
      # Update the weights using the gradient and the learning rate. 
      self.W += -(learning_rate * grad)

screen shot 2018-08-19 at 22 49 55

def predict(self, X):
    """
    Use the trained weights of this linear classifier to predict labels for
    data points.

    Inputs:
    - X: A numpy array of shape (N, D) containing training data; there are N
      training samples each of dimension D.

    Returns:
    - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
      array of length N, and each element is an integer giving the predicted
      class.
    """
    y_pred = np.zeros(X.shape[0])
    Store the predicted labels in y_pred.            #

    pred_scores = X@self.W
    y_pred = np.argmax(pred_scores, axis=1)

    return y_pred

print out

    print(pred_scores[0])
    print(y_pred[0])

[-1.52662675  0.06893752 -0.61177559  0.71873429  0.57737909  0.88258464
  1.41015005  0.54151162 -1.50519876 -0.55446969]
6

JinwoongKim commented 6 years ago

loss

def loss(self, X_batch, y_batch, reg):
    """
    Compute the loss function and its derivative.
    Subclasses will override this.

    Inputs:
    - X_batch: A numpy array of shape (N, D) containing a minibatch of N
      data points; each point has dimension D.
    - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an array of the same shape as W
    """
    linear_svm = LinearSVM()
    loss, grad = linear_svm.loss(X_batch, y_batch, reg)
    return loss, grad

screen shot 2018-08-19 at 22 54 16

class LinearSVM(LinearClassifier):
  """ A subclass that uses the Multiclass SVM loss function """

  def loss(self, X_batch, y_batch, reg):
    return svm_loss_naive(self.W, X_batch, y_batch, reg)
    #return svm_loss_vectorized(self.W, X_batch, y_batch, reg)

JinwoongKim commented 6 years ago

for lr in learning_rates:
    for reg in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=1500, verbose=True)
        y_train_pred = svm.predict(X_train)
        y_val_pred = svm.predict(X_val)
        train_accuracy = np.mean(y_train == y_train_pred)
        val_accuracy = np.mean(y_val == y_val_pred)
        print('train accuracy : ',train_accuracy)
        print('val accuracy : ', val_accuracy)
        results[(lr,reg)] = (train_accuracy, val_accuracy)
        if val_accuracy > best_val:
            best_val = val_accuracy
            best_svm = svm
            print('best_val : ',best_val)
            print('best_svm : ',best_svm)

JinwoongKim commented 6 years ago

iteration 0 / 1500: loss 787.232326
iteration 100 / 1500: loss 284.831832
iteration 200 / 1500: loss 107.276729
iteration 300 / 1500: loss 42.209800
iteration 400 / 1500: loss 18.944271
iteration 500 / 1500: loss 10.454847
iteration 600 / 1500: loss 7.223457
iteration 700 / 1500: loss 5.568888
iteration 800 / 1500: loss 5.224346
iteration 900 / 1500: loss 6.168793
iteration 1000 / 1500: loss 5.490408
iteration 1100 / 1500: loss 4.980247
iteration 1200 / 1500: loss 5.699592
iteration 1300 / 1500: loss 4.823028
iteration 1400 / 1500: loss 5.250684
train accuracy :  0.36493877551
val accuracy :  0.367
best_val :  0.367
best_svm :  <cs231n.classifiers.linear_classifier.LinearSVM object at 0x11b0af748>
iteration 0 / 1500: loss 1569.498526
iteration 100 / 1500: loss 211.988487
iteration 200 / 1500: loss 32.513436
iteration 300 / 1500: loss 9.231102
iteration 400 / 1500: loss 6.877568
iteration 500 / 1500: loss 5.801491
iteration 600 / 1500: loss 5.865757
iteration 700 / 1500: loss 5.501307
iteration 800 / 1500: loss 5.761493
iteration 900 / 1500: loss 5.325307
iteration 1000 / 1500: loss 5.451506
iteration 1100 / 1500: loss 5.193314
iteration 1200 / 1500: loss 5.728135
iteration 1300 / 1500: loss 5.690538
iteration 1400 / 1500: loss 6.110446
train accuracy :  0.356265306122
val accuracy :  0.365
iteration 0 / 1500: loss 798.680433
iteration 100 / 1500: loss 485598970957923960228398820659321372672.000000
iteration 200 / 1500: loss 80265620775463014551175707362150406500508054566095606933766083080653635584.000000
iteration 300 / 1500: loss 13267264273154056173371952815482616768041306531406696979348938900867306491346612173489970785061436414173904896.000000
iteration 400 / 1500: loss 2192972527878573578052990624796076938742342577444713473126484513853806461810660113679487290627617429603565370211154220775405206474100496796745728.000000
iteration 500 / 1500: loss 362480795514210072249120265111840946384868830265077635837943916254457218570107126815791658453165748807079899609080415568746798714205820423445437594030662331335417241273771027857408.000000
iteration 600 / 1500: loss 59915172418379633278322554655856798529407954013739780319387725756003763550682145685432342325613342027510589403494138408992722350487802849944760266475440752955249788039709734664429305031390722523487404499253250228224.000000
iteration 700 / 1500: loss 9903498144864975608163360884457789014061435588664854113721044964232471064777899202281585628488426231303113877151056589441491753662932990505616515878444445064644757830152557866471230700588763127461392690064313458220827763786783253214070538320480305152.000000
iteration 800 / 1500: loss 1636968927010166179454153199572564519030293020174226962642670852526951634629080998686703256570806727018928825720528519249019990339642129526932161870697508482476265124438338597364459352784462955838901192627681138712170504363809963329070870528859870815505612049814247204695478502894338048.000000
/Users/jwkim/Downloads/assignment1/cs231n/classifiers/linear_svm.py:48: RuntimeWarning: overflow encountered in double_scalars
  loss += reg * np.sum(W * W)
/Users/jwkim/Downloads/assignment1/cs231n/classifiers/linear_svm.py:48: RuntimeWarning: overflow encountered in multiply
  loss += reg * np.sum(W * W)

iteration 900 / 1500: loss inf
iteration 1000 / 1500: loss inf
iteration 1100 / 1500: loss inf
iteration 1200 / 1500: loss inf
iteration 1300 / 1500: loss inf
iteration 1400 / 1500: loss inf
train accuracy :  0.0726530612245
val accuracy :  0.062
iteration 0 / 1500: loss 1552.773192
iteration 100 / 1500: loss 4253799210516307611318728161617030569178057409777554142647532115322353601908807176373292086511348338848907478818419474694144.000000
iteration 200 / 1500: loss 10984372492761923336403691926469334653342364230383279160457836184068386587257395547112838022322015675513104885732145675589223330808088367246903863510141557136887986528056307075942082593402917742563176666507637497629502005752982997455240265990144.000000
iteration 300 / 1500: loss inf
iteration 400 / 1500: loss inf
iteration 500 / 1500: loss inf
iteration 600 / 1500: loss nan
iteration 700 / 1500: loss nan
iteration 800 / 1500: loss nan
iteration 900 / 1500: loss nan
iteration 1000 / 1500: loss nan
iteration 1100 / 1500: loss nan
iteration 1200 / 1500: loss nan
iteration 1300 / 1500: loss nan
iteration 1400 / 1500: loss nan
train accuracy :  0.100265306122
val accuracy :  0.087
iteration 0 / 1500: loss 786.196962
iteration 100 / 1500: loss 107.560517
iteration 200 / 1500: loss 18.744186
iteration 300 / 1500: loss 7.318506
iteration 400 / 1500: loss 5.460269
iteration 500 / 1500: loss 5.407416
iteration 600 / 1500: loss 4.963164
iteration 700 / 1500: loss 5.607739
iteration 800 / 1500: loss 5.404160
iteration 900 / 1500: loss 5.553899
iteration 1000 / 1500: loss 5.334320
iteration 1100 / 1500: loss 5.305546
iteration 1200 / 1500: loss 5.138996
iteration 1300 / 1500: loss 5.271654
iteration 1400 / 1500: loss 5.465743
train accuracy :  0.363367346939
val accuracy :  0.361
iteration 0 / 1500: loss 1548.406195
iteration 100 / 1500: loss 32.982621
iteration 200 / 1500: loss 5.995286
iteration 300 / 1500: loss 5.870776
iteration 400 / 1500: loss 5.801156
iteration 500 / 1500: loss 5.135486
iteration 600 / 1500: loss 5.834212
iteration 700 / 1500: loss 5.902708
iteration 800 / 1500: loss 5.951288
iteration 900 / 1500: loss 5.699322
iteration 1000 / 1500: loss 6.041425
iteration 1100 / 1500: loss 5.674803
iteration 1200 / 1500: loss 5.843442
iteration 1300 / 1500: loss 5.362872
iteration 1400 / 1500: loss 5.681511
train accuracy :  0.34006122449
val accuracy :  0.352
lr 1.000000e-07 reg 2.500000e+04 train accuracy: 0.364939 val accuracy: 0.367000
lr 1.000000e-07 reg 5.000000e+04 train accuracy: 0.356265 val accuracy: 0.365000
lr 2.000000e-07 reg 2.500000e+04 train accuracy: 0.363367 val accuracy: 0.361000
lr 2.000000e-07 reg 5.000000e+04 train accuracy: 0.340061 val accuracy: 0.352000
lr 5.000000e-05 reg 2.500000e+04 train accuracy: 0.072653 val accuracy: 0.062000
lr 5.000000e-05 reg 5.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
best validation accuracy achieved during cross-validation: 0.367000
/Users/jwkim/Downloads/assignment1/cs231n/classifiers/linear_svm.py:37: RuntimeWarning: overflow encountered in double_scalars
  loss += margin
/Users/jwkim/Downloads/assignment1/cs231n/classifiers/linear_svm.py:49: RuntimeWarning: overflow encountered in multiply
  dW += 2 * reg * W
/Users/jwkim/Downloads/assignment1/cs231n/classifiers/linear_classifier.py:72: RuntimeWarning: invalid value encountered in add
  self.W += -(learning_rate * grad)

JinwoongKim commented 6 years ago

JinwoongKim commented 6 years ago

def svm_loss_vectorized(W, X, y, reg):

  scores = X@W
  scores_y = np.array([np.choose(y, scores.T)-1,])
  loss = np.sum(np.maximum(0,scores-scores_y.T))-X.shape[0]
  loss = loss / (X.shape[0])+(reg * np.sum(W * W))

JinwoongKim / cs231n

assignment1-2 #2

svm_loss_naive

Stochastic Gradient Descent

print out

loss

def svm_loss_vectorized(W, X, y, reg):