TF: tips 3 - CNN 개념 정리

TF: tips 3 - CNN 개념 정리

2017, May 02    

tf tips. 3 - CNN 개념 정리

Convolutional Neural Network (CNN)은 이미지 분류, 오브젝트 인식 등에서 굉장히 인기있는 알고리즘이다. 일반적인 Deep Neural Network에 비해 이미지 처리에 있어 CNN이 더 좋은 성능을 낸다고 알려져 있는데 그 비법은 인풋 데이터의 처리에 있다. DNN에서 인풋 레이어의 모든 값이 그 다음 레이어의 모든 뉴런에 연결된다면, CNN에서는 filter를 사용해서 서로 인접한 인풋 값이나 뉴런을 그 다음 레이어의 뉴런에 전달한다. 그리고 DNN에서 weight가 서로 다른 값을 가지고 있다면, CNN에서 하나의 필터는 같은 weight를 사용함으로써 위치에 관계없이 특정한 패턴을 탐지할 수 있다. 이러한 구조적인 장점에 힘입어, CNN은 자동으로 유의미한 피쳐를 탐지해낸다.

가장 기본적인 형태의 CNN은 convolution layerpooling layer로 저차원에서 고차원의 피쳐를 추출한 후 이를 기반으로 분류를 수행하기 위해 fully connected layer를 거친다. MNIST 데이터셋을 CNN에 넣고 돌려보자.

import matplotlib.pyplot as plt
from collections import namedtuple
import pandas as pd
%matplotlib inline
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
def build_cnn(learning_rate):
    
    ## define input
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])

    ## weight and bias variables
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)
    
    ## define conv and max_pool
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    ## define operation
    ### conv1: depth 32
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])

    x_image = tf.reshape(x, [-1, 28, 28, 1])
    conv1 = conv2d(x_image, W_conv1) + b_conv1
    h_conv1 = tf.nn.relu(conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    layer1 = h_pool1
    
    ### conv1: depth 64
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    conv2 = conv2d(layer1, W_conv2) + b_conv2
    h_conv2 = tf.nn.relu(conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    layer2 = h_pool2
    
    ### fc1: 1024
    W_fc1 = weight_variable([7*7*64, 1024])
    b_fc1 = bias_variable([1024])
    layer2_matrix = tf.reshape(layer2, [-1, 7*7*64])
    matmul_fc1 = tf.matmul(layer2_matrix, W_fc1) + b_fc1
    h_fc1 = tf.nn.relu(matmul_fc1)
    layer3 = h_fc1

    ### dropout on fc1
    keep_prob = tf.placeholder(tf.float32)
    layer3_drop = tf.nn.dropout(layer3, keep_prob)

    ### fc2: 10
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    matmul_fc2 = tf.matmul(layer3_drop, W_fc2) + b_fc2
    y_conv = tf.nn.softmax(matmul_fc2)
    layer4 = y_conv
    
    ### cross_entropy and train_step
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=layer4)
    cost = tf.reduce_mean(cross_entropy)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    
    ### prediction and accruacy
    correct_prediction = tf.equal(tf.argmax(layer4, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    ### graph nodes
    export_nodes = ['x', 'y_', 'keep_prob', 'cost', 'correct_prediction', 'accuracy', 'train_step']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph
## hyperparams
batch_size = 100
epochs = 2000
learning_rate = 1e-4
keep_prob = 0.5
## run operation
model = build_cnn(learning_rate)
cnn_train_cost = []
cnn_val_acc = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for e in range(epochs):
        batch = mnist.train.next_batch(batch_size)
        
        if (e % 100 == 0) | (e == epochs-1):
            train_acc = sess.run(model.accuracy, feed_dict={
                model.x: batch[0], model.y_: batch[1], model.keep_prob: 1.0
            })
            print("step %d, training_accuracy %g" %(e, train_acc))
            
            val_acc = sess.run(model.accuracy, feed_dict={
                model.x: mnist.test.images, model.y_: mnist.test.labels, model.keep_prob: 1.0
            })
            print("test_accuracy %g" % val_acc)
            cnn_val_acc.append({'e': e, 'val_acc': val_acc})
        _, t_cost = sess.run([model.train_step, model.cost], feed_dict={model.x: batch[0], model.y_: batch[1], model.keep_prob: keep_prob})
        cnn_train_cost.append({'e': e, 'cost': t_cost})
    
step 0, training_accuracy 0.13
test_accuracy 0.1251
step 100, training_accuracy 0.77
test_accuracy 0.7613
step 200, training_accuracy 0.92
test_accuracy 0.9033
step 300, training_accuracy 0.93
test_accuracy 0.9255
step 400, training_accuracy 0.94
test_accuracy 0.9392
step 500, training_accuracy 0.91
test_accuracy 0.9468
step 600, training_accuracy 0.94
test_accuracy 0.9503
step 700, training_accuracy 0.99
test_accuracy 0.9532
step 800, training_accuracy 0.98
test_accuracy 0.9552
step 900, training_accuracy 0.93
test_accuracy 0.9622
step 1000, training_accuracy 0.96
test_accuracy 0.9634
step 1100, training_accuracy 0.98
test_accuracy 0.9664
step 1200, training_accuracy 0.95
test_accuracy 0.9651
step 1300, training_accuracy 0.97
test_accuracy 0.9693
step 1400, training_accuracy 0.96
test_accuracy 0.9717
step 1500, training_accuracy 0.99
test_accuracy 0.9703
step 1600, training_accuracy 0.94
test_accuracy 0.9734
step 1700, training_accuracy 0.98
test_accuracy 0.9722
step 1800, training_accuracy 0.95
test_accuracy 0.9738
step 1900, training_accuracy 0.97
test_accuracy 0.9758
step 1999, training_accuracy 0.98
test_accuracy 0.9771
cnn_train_df = pd.DataFrame.from_dict(cnn_train_cost)
cnn_train_df.set_index('e', inplace=True)

cnn_val_df = pd.DataFrame.from_dict(cnn_val_acc)
cnn_val_df.set_index('e', inplace=True)

plt.plot(cnn_train_df, label="train_cost")
plt.plot(cnn_val_df, label="val_acc")
plt.legend()
plt.show()

png

비교를 위해 간단한 DNN을 만든다

CNN의 convolution layer 뒤에 붙은 fully connected layer를 따로 떼어 DNN으로 만들어 같은 하이퍼파라미터로 학습시킨다.

def build_dnn(learning_rate):
    
    ## define input
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])

    
    ### fc1: 1024
    W_fc1 = weight_variable([784, 1024])
    b_fc1 = bias_variable([1024])
    layer2_matrix = tf.reshape(x, [-1, 784])
    matmul_fc1 = tf.matmul(layer2_matrix, W_fc1) + b_fc1
    h_fc1 = tf.nn.relu(matmul_fc1)
    layer3 = h_fc1

    ### dropout on fc1
    keep_prob = tf.placeholder(tf.float32)
    layer3_drop = tf.nn.dropout(layer3, keep_prob)

    ### fc2: 10
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    matmul_fc2 = tf.matmul(layer3_drop, W_fc2) + b_fc2
    y_conv = tf.nn.softmax(matmul_fc2)
    layer4 = y_conv
    
    ### cross_entropy and train_step
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=layer4)
    cost = tf.reduce_mean(cross_entropy)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    
    ### prediction and accruacy
    correct_prediction = tf.equal(tf.argmax(layer4, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    ### graph nodes
    export_nodes = ['x', 'y_', 'keep_prob', 'cost', 'correct_prediction', 'accuracy', 'train_step']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph
## run operation
model = build_dnn(learning_rate)
dnn_train_cost = []
dnn_val_acc = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for e in range(epochs):
        batch = mnist.train.next_batch(batch_size)
        
        if (e % 100 == 0) | (e == epochs-1):
            train_acc = sess.run(model.accuracy, feed_dict={
                model.x: batch[0], model.y_: batch[1], model.keep_prob: 1.0
            })
            print("step %d, training_accuracy %g" %(e, train_acc))
            
            val_acc = sess.run(model.accuracy, feed_dict={
                model.x: mnist.test.images, model.y_: mnist.test.labels, model.keep_prob: 1.0
            })
            print("test_accuracy %g" % val_acc)
            dnn_val_acc.append({'e': e, 'val_acc': val_acc})
        _, t_cost = sess.run([model.train_step, model.cost], feed_dict={model.x: batch[0], model.y_: batch[1], model.keep_prob: keep_prob})
        dnn_train_cost.append({'e': e, 'cost': t_cost})
    
step 0, training_accuracy 0.11
test_accuracy 0.1089
step 100, training_accuracy 0.59
test_accuracy 0.6115
step 200, training_accuracy 0.62
test_accuracy 0.7274
step 300, training_accuracy 0.84
test_accuracy 0.7722
step 400, training_accuracy 0.84
test_accuracy 0.8707
step 500, training_accuracy 0.93
test_accuracy 0.9026
step 600, training_accuracy 0.92
test_accuracy 0.9079
step 700, training_accuracy 0.87
test_accuracy 0.9121
step 800, training_accuracy 0.93
test_accuracy 0.9138
step 900, training_accuracy 0.9
test_accuracy 0.9208
step 1000, training_accuracy 0.99
test_accuracy 0.9218
step 1100, training_accuracy 0.93
test_accuracy 0.924
step 1200, training_accuracy 0.91
test_accuracy 0.929
step 1300, training_accuracy 0.96
test_accuracy 0.9303
step 1400, training_accuracy 0.96
test_accuracy 0.9297
step 1500, training_accuracy 0.95
test_accuracy 0.9317
step 1600, training_accuracy 0.93
test_accuracy 0.9331
step 1700, training_accuracy 0.93
test_accuracy 0.9342
step 1800, training_accuracy 0.96
test_accuracy 0.9373
step 1900, training_accuracy 0.91
test_accuracy 0.9379
step 1999, training_accuracy 0.95
test_accuracy 0.9388
dnn_train_df = pd.DataFrame.from_dict(dnn_train_cost)
dnn_train_df.set_index('e', inplace=True)

dnn_val_df = pd.DataFrame.from_dict(dnn_val_acc)
dnn_val_df.set_index('e', inplace=True)

plt.plot(dnn_train_df, label="train_cost")
plt.plot(dnn_val_df, label="val_acc")
plt.legend()
plt.show()

png

CNN 만큼은 아니지만 DNN도 꽤 괜찮은 성능을 보였다. 모델간 성능 비교를 위해 train_costval_acc를 플롯화해보자.

plt.plot(cnn_train_df, label="cnn_train_cost", alpha=0.5)
plt.plot(dnn_train_df, label="dnn_train_cost", alpha=0.5)
plt.legend()
plt.show()

png

plt.plot(cnn_val_df, label="cnn_val_acc", alpha=0.5)
plt.plot(dnn_val_df, label="dnn_val_acc", alpha=0.5)
plt.legend()
plt.show()

png

CNN이 DNN에 비해 train_cost가 더 빨리 떨어지며, validation accuracy 역시 빠르게 상승하는 것을 볼 수 있다. 또한 학습을 시킴에 따라 validation_accuracy가 두 모델 다 수렴했으나, CNN의 수렴점이 DNN의 그것에 비해 소폭 높음을 확인할 수 있다.