I'm trying to train a CNN for MNIST, everything goes well except for the loss stays very high in my model, while it's very low in example(which has different structure).
this model still yields high accuracy though there is high loss.
Here I attached my code.
with tf.name_scope("inputs"):
X = inputs = tf.placeholder(tf.float32,
shape=[None, 28, 28, 1], name="X")
y = tf.placeholder(
tf.int32,
name="y")
training = tf.placeholder_with_default(False, [], name="training")
conv1 = tf.layers.conv2d(
inputs,
filters=6,
kernel_size=3,
strides=(1, 1),
padding='SAME',
activation=tf.nn.selu,
name="conv1",
)
conv2 = tf.layers.conv2d(
conv1,
filters=12,
kernel_size=3,
strides=(1, 1),
padding='SAME',
activation=tf.nn.selu,
name="conv2",
)
max_pool3 = tf.layers.max_pooling2d(
conv2,
pool_size=8,
strides=2,
padding='SAME',
name="max_pool3"
)
with tf.name_scope("conv4"):
conv4 = tf.layers.conv2d(
max_pool3,
filters=12,
kernel_size=3,
strides=(1, 1),
padding='SAME',
activation=tf.nn.selu,
name="conv4",
)
num_ele = int(conv4.shape[1]*conv4.shape[2]*conv4.shape[3])
conv4_flat = tf.reshape(
conv4,
shape=[-1, num_ele],
name="conv4_flat"
)
conv4_flat_dropout = tf.layers.dropout(conv4_flat, rate=dropout_rate,
training=training,
name="conv4_flat_dropout")
with tf.name_scope("fc5"):
fc5 = tf.layers.dense(
conv4_flat_dropout,
conv4_flat_dropout.shape[1]//2,
activation=tf.nn.selu,
name="fc5",
)
fc5_dropout = tf.layers.dropout(fc5, rate=dropout_rate,
training=training,
name="fc5_dropout")
logits = tf.layers.dense(
fc5_dropout,
n_outputs,
name="logits",
)
And the training process
# It starts with very low accuracy, but instead in the sample after the first epoch the accuracy for training sets reaches 1.
0 train loss:1.8484
train acc:0.751745
validation loss:1.7019
validation acc:0.7656
1 train loss:0.0745
train acc:0.978927
validation loss:0.0782
validation acc:0.9764
2 train loss:0.0958
train acc:0.972818
validation loss:0.1072
validation acc:0.9706
3 train loss:0.1186
train acc:0.971727
validation loss:0.1292
validation acc:0.9714
4 train loss:0.1397
train acc:0.969836
validation loss:0.1422
validation acc:0.9738
# Accuracy for some reason always drops dramaticlt here. Which I don't understand why.
5 train loss:0.8394
train acc:0.939564
validation loss:0.8237
validation acc:0.9470
6 train loss:0.3108
train acc:0.979182
validation loss:0.3345
validation acc:0.9786
7 train loss:0.6576
train acc:0.967382
validation loss:0.8300
validation acc:0.9652
8 train loss:0.2005
train acc:0.987273
validation loss:0.3021
validation acc:0.9832
9 train loss:0.2915
train acc:0.984145
validation loss:0.4509
validation acc:0.9812
10 train loss:0.7932
train acc:0.968273
validation loss:1.1119
validation acc:0.9634
11 train loss:0.2778
train acc:0.988636
validation loss:0.4988
validation acc:0.9848
12 train loss:0.4892
train acc:0.982982
validation loss:0.6407
validation acc:0.9826
13 train loss:0.5457
train acc:0.983382
validation loss:0.9361
validation acc:0.9806
14 train loss:0.3998
train acc:0.989527
validation loss:0.7423
validation acc:0.9876
15 train loss:0.3925
train acc:0.985745
validation loss:0.7599
validation acc:0.9788
16 train loss:0.2093
train acc:0.993236
validation loss:0.5771
validation acc:0.9850
17 train loss:0.5663
train acc:0.989855
validation loss:1.2298
validation acc:0.9846
18 train loss:0.6623
train acc:0.988927
validation loss:1.3572
validation acc:0.9824
19 train loss:0.1555
train acc:0.994891
validation loss:0.6606
validation acc:0.9872
And the sample code.
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
y = tf.placeholder(tf.int32, shape=[None], name="y")
training = tf.placeholder_with_default(False, shape=[], name='training')
conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
strides=conv1_stride, padding=conv1_pad,
activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
strides=conv2_stride, padding=conv2_pad,
activation=tf.nn.relu, name="conv2")
with tf.name_scope("pool3"):
pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])
pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=training)
with tf.name_scope("fc1"):
fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)
with tf.name_scope("output"):
logits = tf.layers.dense(fc1, n_outputs, name="output")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
I found that most of the classifications are classified with very high probability(like 1). Both those correct ones and false ones.
(softmax probablity)
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 6.966008e-33 0.000000e+00]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
The probability are corresponidng to each of the images
My question is
Original codes are here
https://github.com/Dovermore/handson-ml/blob/master/chapter_13_exer/question_7.ipynb
https://github.com/Dovermore/handson-ml/blob/master/13_convolutional_neural_networks.ipynb
(go to the bottom to find the related segment in the second link)