bethgelab / foolbox

A Python toolbox to create adversarial examples that fool neural networks in PyTorch, TensorFlow, and JAX
https://foolbox.jonasrauber.de
MIT License
2.77k stars 426 forks source link

FGSM limit for small max_epsilon #245

Closed Fluffigpegasus closed 5 years ago

Fluffigpegasus commented 5 years ago

FGSM attack: When changing the max_epsilon from 1 to 0.01 is gives us this error. Is there a limit for how small the max_epsilon can be?

whatsapp image 2018-12-04 at 16 41 35

adversarial = attack(train_input1, 1,max_epsilon=0.01,epsilons=10)
wielandbrendel commented 5 years ago

If your epsilon is too small the attack cannot find an adversarial in which case it returns None.

Daisy1996926 commented 5 years ago

Thank you for replying!

But our scale for pixel is somehow abnormal which is (-255,0).

# Attack
with foolbox.models.TensorFlowModel(image_holder, logits,(-255,0)) as model:
    saver.restore(model.session, "save7/formal_train7.ckpt")
    print(np.argmax(model.predictions(train_input1)))

And we got the accuracy result from the FGSM attack is 0.000 which is not normal for attack. image In the picture, 0 is the number we count for calculating how many pictures have the same predict label and adversarial label(which means attack failed and model can defend the attack). And the 151 is the number that we subtract the misclassified images from our original model from the total input

I am just confused about the meaning of max_epsilon and epsilons, guessing that maybe they have some relationship with the scale for pixel (which is normally (0,1) in your example). So could you please take a look at our code to see whether you can find some obvious problem that affect this abnormal accuracy from your perspective?

#-*- coding:utf-8 -*-
from skimage import transform,data,io
#import numpy as np
import numpy as np
#import random
import tensorflow as tf
#import time
import os
import shutil
import foolbox
from foolbox.models import TensorFlowModel
from foolbox.criteria import Misclassification
from foolbox.attacks import FGSM

def is_chinese(uchar):
    """判断一个unicode是否是汉字"""
    if uchar >= u'\u4e00' and uchar <= u'\u9fa6':
        return True
    else:
        return False
f=open('characters.txt',encoding = 'utf-8')
a=list(f.read())
characters=[]
for i in range(len(a)):
    if is_chinese(a[i]):
        characters.append(a[i])

from PIL import Image
import matplotlib.pyplot as plt # plt 用于显示图片
import matplotlib.image as mpimg # mpimg 用于读取图片

os.mkdir('apply_save7_matrix')
#本文件将apply_save7_png的图片识别出来

def eachFile(filepath,despath):
    pathDir = os.listdir(filepath)
    pathDir.sort()
    for allDir in pathDir:
        if allDir == '.DS_Store':
            continue;
        pil_im=io.imread(filepath+'/'+allDir,as_grey=True)
        [a, b] = pil_im.shape
        a_min=0
        a_max=a-1
        b_min=0
        b_max=b-1
        while np.sum(pil_im[a_min]==1)==b:
            a_min+=1
        while np.sum(pil_im[a_max]==1)==b:
            a_max-=1
        while np.sum(pil_im[:,b_min]==1)==a:
            b_min+=1
        while np.sum(pil_im[:,b_max]==1)==a:
            b_max-=1
        pil_im=pil_im[a_min:a_max+1,b_min:b_max+1]
        [a, b] = pil_im.shape

        size_max = 96
        if (a > b):
            pil_im = transform.rescale(pil_im, size_max / a)
        else:
            pil_im = transform.rescale(pil_im, size_max / b)
        out = np.array(pil_im)
        out=((out-1)*255)
        out=out.astype(int)
        out1=np.zeros([size_max,size_max],dtype=int)
        sh=out.shape
        out1[int(size_max/2-sh[0]/2):int(size_max/2+sh[0]/2),int(size_max/2-sh[1]/2):int(size_max/2+sh[1]/2)]=out
        destination=despath+'/'+allDir[:-4]

        np.save(destination, out1)

eachFile('apply_save7_png','apply_save7_matrix')

global conv_num
conv_num=1
char_num=3755
def variable_with_weight_loss(shape,stddev,w1):
    global conv_num

    var=tf.Variable(tf.truncated_normal(shape,stddev=stddev),name='weights'+str(conv_num))
    if w1 is not None:
        weight_loss=tf.multiply(tf.nn.l2_loss(var),w1,name='weight_loss')
        tf.add_to_collection('losses',weight_loss)
    return var

train_dir1 = os.listdir('apply_save7_matrix')
train_dir = ['apply_save7_matrix/' + x for x in train_dir1]

train_input=[np.load(x)for x in train_dir]

#creat label variable
test_label = []
for x in train_dir1:
    test_label.append(x[:5])
test_label = list(map(int, test_label))

batch_size=1
size_max=96
keep_prob=1

image_holder=tf.placeholder(tf.float32,[batch_size,size_max,size_max,1])
label_holder=tf.placeholder(tf.int32,[batch_size])

def build_conv(images,w_shape,k_stride,b_value,b_shape):
    global conv_num
    with tf.name_scope('conv' + str(conv_num)) as scope:
        #weight = variable_with_weight_loss(shape=w_shape, stddev=5e-2, w1=0.0)
        weight=tf.Variable(tf.truncated_normal(w_shape,stddev=5e-2),name='weights'+str(conv_num))
        kernel = tf.nn.conv2d(images, weight, k_stride, padding='SAME')
        bias = tf.Variable(tf.constant(b_value, shape=b_shape), name='bias' + str(conv_num))
        conv = tf.nn.relu(tf.nn.bias_add(kernel, bias), name='conv' + str(conv_num))

    conv_num = conv_num + 1

    return conv

def build_conv_no_relu(images,w_shape,k_stride,b_value,b_shape):
    global conv_num
    with tf.name_scope('conv' + str(conv_num)) as scope:

        weight=tf.Variable(tf.truncated_normal(w_shape,stddev=5e-2),name='weights'+str(conv_num))
        kernel = tf.nn.conv2d(images, weight, k_stride, padding='SAME')
        bias = tf.Variable(tf.constant(b_value, shape=b_shape), name='bias' + str(conv_num))
        conv=tf.nn.bias_add(kernel,bias,name='conv' + str(conv_num))

    conv_num = conv_num + 1

    return conv
#此函数构造相同维度的block
def build_same_block(images,w_shape,b_value,b_shape,layer_num,keep_prob):
    global conv_num
    with tf.name_scope('conv' + str(conv_num)) as scope:
        weight = tf.Variable(tf.truncated_normal(w_shape, stddev=5e-2), name='weights' + str(conv_num))
        kernel = tf.nn.conv2d(images, weight, [1, 1, 1, 1], padding='SAME')
        bias = tf.Variable(tf.constant(b_value, shape=b_shape), name='bias' + str(conv_num))
        conv = tf.nn.relu(tf.nn.bias_add(kernel, bias), name='conv' + str(conv_num))
        conv_num = conv_num + 1

    for layer in range(layer_num-1):
        with tf.name_scope('conv' + str(conv_num)) as scope:
            #weight = variable_with_weight_loss(shape=w_shape, stddev=5e-2, w1=0.0)
            weight = tf.Variable(tf.truncated_normal(w_shape, stddev=5e-2), name='weights' + str(conv_num))
            kernel = tf.nn.conv2d(conv, weight, [1, 1, 1, 1], padding='SAME')
            bias = tf.Variable(tf.constant(b_value, shape=b_shape), name='bias' + str(conv_num))
            #conv = tf.nn.relu(tf.nn.bias_add(kernel, bias), name='conv' + str(conv_num))
            conv_1=tf.nn.bias_add(kernel,bias,name='conv'+str(conv_num))
            if layer!=layer_num-2:
                conv=tf.nn.relu(conv_1, name='relu' + str(conv_num))
            conv_num = conv_num + 1
    #conv_s=conv+images
    conv_s = tf.nn.relu(conv_1 + tf.nn.dropout(images, keep_prob))
    return conv_s

#映射成第一层64个卷积层
block_num1=3#3
conv=build_conv(image_holder,w_shape=[7,7,1,32],b_value=0.0,k_stride=[1,2,2,1],b_shape=[32])
for i in range(block_num1):
    with tf.name_scope('block1_'+str(i+1)) as scope:
        conv = build_same_block(images=conv, w_shape=[3, 3, 32, 32], b_value=0.0, b_shape=[32], layer_num=2,keep_prob=keep_prob)

#pool=tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#norm=tf.nn.lrn(pool,4,bias=1.0,alpha=0.001/9.0,beta=0.75)

#映射成第二层64个卷积层
block_num2=4#
conv1=build_conv(conv,w_shape=[3,3,32,64],k_stride=[1,2,2,1],b_value=0.1,b_shape=[64])
conv2=build_conv_no_relu(conv1,w_shape=[3,3,64,64],k_stride=[1,1,1,1],b_value=0.1,b_shape=[64])
conv3=build_conv_no_relu(conv,w_shape=[1,1,32,64],k_stride=[1,2,2,1],b_value=0.1,b_shape=[64])
conv=tf.nn.relu(conv2+tf.nn.dropout(conv3,keep_prob))
for i in range(block_num2):
    with tf.name_scope('block2_' + str(i + 1)) as scope:
        conv = build_same_block(images=conv, w_shape=[3, 3, 64, 64], b_value=0.1, b_shape=[64], layer_num=2,keep_prob=keep_prob)

#第三层
block_num3=6#5
#conv=build_conv(conv,w_shape=[3,3,64,128],k_stride=[1,2,2,1],b_value=0.1,b_shape=[128])
conv1=build_conv(conv,w_shape=[3,3,64,128],k_stride=[1,2,2,1],b_value=0.1,b_shape=[128])
conv2=build_conv_no_relu(conv1,w_shape=[3,3,128,128],k_stride=[1,1,1,1],b_value=0.1,b_shape=[128])
conv3=build_conv_no_relu(conv,w_shape=[1,1,64,128],k_stride=[1,2,2,1],b_value=0.1,b_shape=[128])
conv=tf.nn.relu(conv2+tf.nn.dropout(conv3,keep_prob))
for i in range(block_num3):
    with tf.name_scope('block3_' + str(i + 1)) as scope:
        conv = build_same_block(images=conv, w_shape=[3, 3, 128, 128], b_value=0.1, b_shape=[128], layer_num=2,keep_prob=keep_prob)

#train_input1=np.array(train_input1).reshape((batch_size,size_max,size_max,1))
#train_input1=np.array(train_input1).reshape((size_max,size_max,1))
for reshape_num in range(len(train_dir1)):
    train_input[reshape_num]=np.array(train_input[reshape_num]).reshape((size_max,size_max,1))
#print(train_input1.shape)

#print reshape train input
#for x in range(1,96):
#    for i in range(1,96):
#        if train_input1[x][i]!=0:
#            print(train_input1[x][i])

pool=tf.nn.avg_pool(conv,ksize=[1,3,3,1],strides=[1,1,1,1],padding='SAME')

#将conv2扁平化形成一个全连接层,有num1个节点
#num1=2000

reshape=tf.reshape(pool,[batch_size,-1])
dim=reshape.get_shape()[1].value
#weight3=variable_with_weight_loss(shape=[dim,char_num],stddev=0.04,w1=0.0)
weight3=tf.Variable(tf.truncated_normal(shape=[dim,char_num],stddev=0.04),name='weights'+str(conv_num))
bias3=tf.Variable(tf.constant(0.1,shape=[char_num]))
logits=tf.matmul(reshape,weight3)+bias3
y_conv=tf.nn.softmax(logits)

def loss(logits,labels):
    labels=tf.cast(labels,tf.int64)
    cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='cross_entropy_per_example')
    cross_entropy_mean=tf.reduce_mean(cross_entropy,name='cross_entropy')
    tf.add_to_collection('losses',cross_entropy_mean)
    return tf.add_n(tf.get_collection('losses'),name='total_loss')

loss=loss(logits,label_holder)
train_op=tf.train.AdamOptimizer(1e-3).minimize(loss)
top_k_op=tf.nn.in_top_k(logits,label_holder,1)
prediction=tf.argmax(y_conv,1)

sess=tf.InteractiveSession()
tf.global_variables_initializer().run()
tf.train.start_queue_runners()

saver=tf.train.Saver()
saver.restore(sess,'save7/formal_train7.ckpt')

# Attack
with foolbox.models.TensorFlowModel(image_holder, logits,(-255,0)) as model:
    saver.restore(model.session, "save7/formal_train7.ckpt")
attack = foolbox.attacks.FGSM(model)
#attack = foolbox.attacks.PGD(model)
#attack = foolbox.attacks.CarliniWagnerL2Attack(model)

#count the num of $F(x_i) = F(x'_i)$
count = 0
count_nonmisclassify = 0
for test_num in range(len(train_dir1)):
    predict_label = np.argmax(model.predictions(train_input[test_num]))
    print(predict_label)
    adversarial = attack(train_input[test_num], test_label[test_num],max_epsilon=0.02,epsilons=10)

    #print out the adversarial class from the foolbox model
    adversarial_label = np.argmax(model.predictions(adversarial))
    print('adversarial class', adversarial_label)
    if(predict_label == test_label[test_num]):
        count_nonmisclassify = count_nonmisclassify + 1
        if (predict_label == adversarial_label):
            count = count + 1

#caculate the accuracy with adversarial examples
print(count)
print(count_nonmisclassify)
accuracy = count/count_nonmisclassify*100
print('%.4f'%accuracy,end="")

shutil.rmtree('apply_save7_matrix')