Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training a cNN to detect roadsigns\n",
"In order to process roadsigns in the autonomous car of the Freie Universität, we want to train a convolutional (deep) neural network.\n",
"\n",
"This network is supposed to distinguish between different classes of signs (stop, attention, train crossing etc) and the final model will then be integrated to the autonomos ROS structure.\n",
"\n",
"This notebook shall download the dataset, read it in and then train the classifier. Afterwards, a validation of the training procedure will be done."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from PIL import Image\n",
"import urllib2, cStringIO, zipfile\n",
"import csv\n",
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download the dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"url = 'http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip'\n",
"\n",
"if not os.path.exists('GTSRB/Final_Training/Images'):\n",
" try:\n",
" remotezip = urllib2.urlopen(url)\n",
" zipinmemory = cStringIO.StringIO(remotezip.read())\n",
" zip = zipfile.ZipFile(zipinmemory)\n",
" zip.extractall('.')\n",
" except urllib2.HTTPError:\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read the data in and scale it to a fixed resolution"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def readTrafficSigns(rootpath):\n",
" '''Reads traffic sign data for German Traffic Sign Recognition Benchmark.\n",
"\n",
" Arguments: path to the traffic sign data, for example './GTSRB/Training'\n",
" Returns: list of images, list of corresponding labels'''\n",
" images = [] # images\n",
" labels = [] # corresponding labels\n",
" # loop over all 42 classes\n",
" for c in range(0,43): #43\n",
" prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class\n",
" gtFile = open(prefix + 'GT-'+ format(c, '05d') + '.csv') # annotations file\n",
" gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file\n",
" gtReader.next() # skip header\n",
" # loop over all images in current annotations file\n",
" for row in gtReader:\n",
" img = Image.open(prefix + row[0]) # the 1th column is the filename\n",
" img_resized = img.resize((128, 128), Image.ANTIALIAS)\n",
" images.append(np.array(img_resized.getdata()).reshape(128, 128, 3))\n",
" del img, img_resized\n",
" labels.append(row[7]) # the 8th column is the label\n",
" gtFile.close()\n",
" print \"Loaded images from class \" + str(c)\n",
" return images, labels"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded images from class 0\n",
"Loaded images from class 1\n",
"Loaded images from class 2\n"
]
}
],
"source": [
"trainImg, trainLabels = readTrafficSigns('GTSRB/Final_Training/Images')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Print some information on the data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of training images: 4680\n",
"Number of training labels: 4680\n",
"Largest Image Dimensions: (128, 128, 3)\n"
]
}
],
"source": [
"print \"Number of training images: \" + str(len(trainImg))\n",
"print \"Number of training labels: \" + str(len(trainLabels))\n",
"maxShape = (0,0)\n",
"maxPos = 0\n",
"pos = 0\n",
"for img in trainImg:\n",
" if np.prod(img.shape) > np.prod(maxShape):\n",
" maxShape = img.shape\n",
" maxPos = pos\n",
" pos += 1\n",
"print \"Largest Image Dimensions: \" + str(maxShape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Permute the training data randomly (and subset for testing)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"permutation = np.random.permutation(len(trainImg))\n",
"trainImg_per = [trainImg[idx] for idx in permutation]\n",
"trainLabels_per = [trainLabels[idx] for idx in permutation]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_set = np.array(trainImg_per[:100])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Make all images the same size\n",
"For a convolutional Neural Network to work, the images all need to have the same size.\n",
"This can be done by padding with black pixels. Then, all of the images are of the size of the largest image.\n",
"\n",
"However, this approach may have drawbacks when it comes to the learning."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#maxShape = (256, 256, 3)\n",
"#train_set = []\n",
"#count = 0\n",
"#for img in trainImg_per[:100]:\n",
"# padded = np.zeros(maxShape)\n",
"# x1 = int(padded.shape[0] / 2. - img.shape[0] / 2.)\n",
"# y1 = int(padded.shape[1] / 2. - img.shape[1] / 2.)\n",
"# padded[x1:img.shape[0]+x1, y1:img.shape[1]+y1] = img\n",
"# train_set.append(padded)\n",
"# if count % 500 == 0:\n",
"# print \"Done with \" + str(count) + \" images\"\n",
"# count += 1\n",
"#train_set = np.array(train_set, dtype=np.float32)\n",
"#print train_set.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"len(train_set)\n",
"for img in train_set:\n",
" if not img.shape == maxShape:\n",
" print \"ERROR\"\n",
" print img.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Transform labels to one-hot-vectors"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0. 0.]\n",
"43\n",
"43\n"
]
}
],
"source": [
"train_labels = []\n",
"number_of_classes = 43\n",
"for label in trainLabels_per[:100]:\n",
" new_label = np.zeros(number_of_classes)\n",
" new_label[int(label)] = 1\n",
" train_labels.append(new_label)\n",
"train_labels = np.array(train_labels, dtype=np.float32)\n",
"print train_labels[9]\n",
"print train_labels[0].shape[0]\n",
"print train_labels.shape[1]"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"## Build the convolutional Neural Network"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\"\"\"\n",
"This class implements a convolutional neural network classifier.\n",
"Main usage should consist of two steps, namely train and evaluate. During training, the weights of the network\n",
"will be changed in a way to nicely represent the data and classify it in the end.\n",
"\n",
"Parameters:\n",
" img_shape:\n",
" Shape of the images that will be presented to the\n",
" network as (width, height, #channels)\n",
" \n",
" learning_rate:\n",
" The learning rate used for gradient descent\n",
" \n",
" architecture:\n",
" List. It contains for each layer the number\n",
" of neurons. For the convolution, the number\n",
" of neurons corresponds to the number of kernels.\n",
"\"\"\"\n",
"class cNN:\n",
" \n",
" def __init__(self, architecture, img_shape=(28,28), kernel_shape=(5,5), learning_rate=0.001):\n",
" if len(architecture) > 5:\n",
" print \"ERROR. The network is too deep. So far, we can't deal with more than 5 layers!\"\n",
" self.learning_rate = learning_rate\n",
" self.architecture = architecture\n",
" self.kernel_shape = kernel_shape\n",
" \n",
" # some variables which are set by the training function\n",
" self.img_shape = img_shape #(x, y, channels)\n",
" self.n_classes = 10\n",
" \n",
" \n",
" \"\"\"\n",
" This function generates lists of weight matrices and bias matrices from\n",
" some given architecture.\n",
" These can then be used to construct the network.\n",
" \"\"\"\n",
" def generate_weights_and_biases(self):\n",
" weights = []\n",
" biases = []\n",
" for layer in xrange(self.architecture):\n",
" if self.architecture[layer][0] == \"conv\":\n",
" if layer == 0: # first layer\n",
" last_output = img_shape[2]\n",
" else:\n",
" last_output = self.architecture[layer-1][1]\n",
" weights.append(tf.Variable(tf.random_normal([self.kernel_shape[0],\n",
" self.kernel_shape[1],\n",
" img_shape[2],\n",
" self.architecture[layer][1]]\n",
" )))\n",
" biases.append(tf.Variable(tf.random_normal([self.architecture[layer][1]])))\n",
"\n",
" elif self.architecture[layer][0] == \"dense\":\n",
" last_output = self.architecture[layer-1][1]\n",
" num_of_conv_layers_so_far = len([i for i in xrange(layer) if self.architecture[i] == \"conv\"])\n",
" num_of_input_units = (img_shape[0] * img_shape[1]) / num_of_conv_layers_so_far**2\n",
" weights.append(tf.Variable(tf.random_normal([num_of_input_units, self.architecture[layer][1]])))\n",
" biases.append(tf.Variable(tf.random_normal([self.architecture[layer][1]])))\n",
" \n",
" elif self.architecture[layer][0] == \"out\":\n",
" last_output = self.architecture[layer-1][1]\n",
" weights.append(tf.Variable(tf.random_normal([last_output, self.architecture[layer][1]])))\n",
" biases.append(tf.Variable(tf.random_normal([self.architecture[layer][1]])))\n",
" \n",
" def weight_variable(self, shape):\n",
" initial = tf.truncated_normal(shape, stddev=0.1)\n",
" return tf.Variable(initial)\n",
"\n",
" def bias_variable(self, shape):\n",
" initial = tf.constant(0.1, shape=shape)\n",
" return tf.Variable(initial)\n",
"\n",
" def conv2d(self, x, W):\n",
" return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')\n",
"\n",
" def max_pool_2x2(self, x, k=2):\n",
" return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')\n",
"\n",
"\n",
" \n",
" \"\"\"\n",
" This function creates the graph for training and returns a tensorflow function object.\n",
" This object can then be used to train the batches.\n",
" \n",
" Parameters:\n",
" images:\n",
" A batch of images with shape: (n_batch, width, height, n_channels)\n",
" labels:\n",
" A batch of labels encoded as one-hot vector with shape:\n",
" (n_batch, n_classes)\n",
" keep_prob:\n",
" A number that indicates the dropout probability\n",
" \"\"\"\n",
" def construct_model(self, images, labels, keep_prob):\n",
" \n",
" # some network properties\n",
" n_kernels_c1 = 50\n",
" n_kernels_c2 = 64\n",
" n_neurons_d1 = 1024\n",
" pool_factor_1 = 4\n",
" pool_factor_2 = 4\n",
" \n",
" # create variables for layer one\n",
" W_conv1 = self.weight_variable([kernel_shape[0], kernel_shape[1], self.img_shape[2], n_kernels_c1])\n",
" b_conv1 = self.bias_variable([n_kernels_c1])\n",
" \n",
" # do convolution\n",
" # and max pooling for layer one\n",
" h_conv1 = tf.nn.relu(self.conv2d(images, W_conv1) + b_conv1)\n",
" h_pool1 = self.max_pool_2x2(h_conv1, k=pool_factor_1)\n",
"\n",
" # initialize vars for layer two\n",
" W_conv2 = self.weight_variable([kernel_shape[0], kernel_shape[1], n_kernels_c1, n_kernels_c2])\n",
" b_conv2 = self.bias_variable([n_kernels_c2])\n",
"\n",
" # convolve and max pool layer 2\n",
" h_conv2 = tf.nn.relu(self.conv2d(h_pool1, W_conv2) + b_conv2)\n",
" h_pool2 = self.max_pool_2x2(h_conv2, k=pool_factor_2)\n",
" \n",
" # now, do the dense layer 3\n",
" reduced_img_w = self.img_shape[0] / (pool_factor_1*pool_factor_2)\n",
" reduced_img_h = self.img_shape[1] / (pool_factor_1*pool_factor_2)\n",
" \n",
" W_fc1 = self.weight_variable([reduced_img_w * reduced_img_h * n_kernels_c2, n_neurons_d1])\n",
" b_fc1 = self.bias_variable([n_neurons_d1])\n",
" h_pool2_flat = tf.reshape(h_pool2, [-1, reduced_img_w*reduced_img_h*n_kernels_c2])\n",
" h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)\n",
"\n",
" # apply dropout\n",
" h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)\n",
" \n",
" # out_layer\n",
" W_fc2 = self.weight_variable([n_neurons_d1, self.n_classes])\n",
" b_fc2 = self.bias_variable([self.n_classes])\n",
" y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)\n",
"\n",
" # add cross entropy as objective function\n",
" cross_entropy = tf.reduce_mean(-tf.reduce_sum(labels * tf.log(y_conv), reduction_indices=[1]))\n",
" train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(cross_entropy)\n",
" correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(labels, 1))\n",
" accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
"\n",
" return (train_step, cross_entropy, accuracy)\n",
"\n",
" def train_model(self, images, labels):\n",
" \n",
" print images.shape\n",
" assert(images.shape[0] == labels.shape[0])\n",
" \n",
" # set some class variables before constructing the model\n",
" self.n_classes = labels.shape[1]\n",
" self.img_shape = images[0].shape\n",
" train_size = images.shape[0]\n",
" batch_size = 10\n",
" batch_runs = train_size / batch_size\n",
" print \"Batch size: \" + str(batch_size)\n",
" print \"Number of iterations per epoch: \" + str(batch_runs)\n",
"\n",
" # create the graph\n",
" x = tf.placeholder(tf.float32, shape=(batch_size, self.img_shape[0], self.img_shape[1], self.img_shape[2]))\n",
" y = tf.placeholder(tf.float32, shape=(batch_size, self.n_classes))\n",
" keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)\n",
" train_op, ce_op, accuracy_op = self.construct_model(x, y, keep_prob)\n",
" print \"Graph successfully constructed! Start training...\"\n",
" \n",
" self.accuracies = []\n",
" with tf.Session() as sess:\n",
" sess.run(tf.initialize_all_variables())\n",
" for epoch in range(1):\n",
" for batchIdx in range(batch_runs):\n",
" sess.run(train_op, feed_dict={x: images[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" y: labels[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" keep_prob: 0.5})\n",
" \n",
" if batchIdx % (batch_runs / 10) == 0:\n",
" acc = sess.run(accuracy_op, feed_dict={x: images[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" y: labels[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" keep_prob: 1.})\n",
" ce = sess.run(ce_op, feed_dict={x: images[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" y: labels[batchIdx*batch_size:(batchIdx+1)*batch_size],\n",
" keep_prob: 1.})\n",
" print \"[Batch \" + str(batchIdx) + \"]\\tAccuracy: \" + str(acc) + \"\\tCross Entropy: \" + str(ce)\n",
" self.accuracies.append(acc)\n",
" \n",
" print \"Epoch \" + str(epoch) + \" done!\"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(128, 128, 3)\n",
"(100, 128, 128, 3)\n",
"Batch size: 10\n",
"Number of iterations per epoch: 10\n",
"Graph successfully constructed! Start training...\n",
"[Batch 0]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 1]\tAccuracy: 0.1\tCross Entropy: nan\n",
"[Batch 2]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 3]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 4]\tAccuracy: 0.1\tCross Entropy: nan\n",
"[Batch 5]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 6]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 7]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 8]\tAccuracy: 0.0\tCross Entropy: nan\n",
"[Batch 9]\tAccuracy: 0.1\tCross Entropy: nan\n",
"Epoch 0 done!\n"
]
}
],
"source": [
"architecture = [(\"conv\", 32), (\"conv\", 64), (\"dense\", 1024), (\"out\", 43)]\n",
"img_shape = train_set[0].shape\n",
"print maxShape\n",
"kernel_shape = (5, 5)\n",
"learning_rate = 0.001\n",
"classifier = cNN(architecture, img_shape, kernel_shape, learning_rate)\n",
"classifier.train_model(train_set, train_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.0\n",
"0.0\n"
]
}
],
"source": [
"#y = tf.placeholder(tf.float32, shape=(50))\n",
"\n",
"def weight_variable(shape):\n",
" initial = tf.truncated_normal(shape, stddev=0.1)\n",
" return tf.Variable(initial)\n",
"\n",
"def bias_variable(shape):\n",
" initial = tf.constant(0.1, shape=shape)\n",
" return tf.Variable(initial)\n",
"\n",
"# initialize variables\n",
"x = tf.placeholder(tf.float32, shape=(10, 256, 256, 3))\n",
"y = tf.placeholder(tf.float32, shape=(10, 43))\n",
"\n",
"# do convolution and pooling\n",
"W = tf.Variable(tf.truncated_normal([5, 5, 3, 10], stddev=0.1))\n",
"b = tf.Variable(tf.constant(0.1, shape=[10]))\n",
"conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + b\n",
"pool = tf.nn.max_pool(conv, ksize=[1, 8, 8, 1], strides=[1, 8, 8, 1], padding='SAME')\n",
"\n",
"# now, do the dense layer\n",
"W_dense = weight_variable([32 * 32 * 10, 43])\n",
"b_dense = bias_variable([43])\n",
"pool_flat = tf.reshape(pool, [-1, 32*32*10])\n",
"dense_out = tf.nn.relu(tf.matmul(pool_flat, W_dense) + b_dense)\n",
"\n",
"# define loss function\n",
"cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(dense_out, y))\n",
"# define optimizer\n",
"optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(cost)\n",
"# define accuracy\n",
"correct_pred = tf.equal(tf.argmax(dense_out, 1), tf.argmax(y, 1))\n",
"accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))\n",
"\n",
"with tf.Session() as sess:\n",
" sess.run(tf.initialize_all_variables())\n",
" rand_array = np.random.rand(10, 256, 256, 3)\n",
" train = sess.run(optimizer, feed_dict={x: train_set, y: train_labels})\n",
" loss, acc = sess.run([cost, accuracy], feed_dict={x: train_set, y: train_labels})\n",
" print acc\n",
" print loss"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting MNIST_data/train-images-idx3-ubyte.gz\n",
"Extracting MNIST_data/train-labels-idx1-ubyte.gz\n",
"Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n",
"Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n"
]
}
],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"?mnist"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def construct_model_fancy(self, images, labels, keep_prob):\n",
"\n",
" for layer in xrange(len(self.architecture)):\n",
" W_local = self.weights[layer]\n",
" b_local = self.biases[layer]\n",
"\n",
" layer_type = self.architecture[layer][0] # it's a string of either \"conv\", \"dense\" or \"out\"\n",
" if layer_type == \"conv\":\n",
" h_conv1 = tf.nn.relu(self.conv2d(images, W_conv1) + b_conv1)\n",
" h_pool1 = self.max_pool_2x2(h_conv1, k=2)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}