Using tensorflow to build and train CNN to do simple verification code recognition


Tensorflow is the most popular deep learning framework. We can use it to build our own convolutional neural network and train our own classifier. This paper introduces how to use tensorflow to build our own CNN and how to train the classifier for simple verification code recognition. This article assumes that you have installed tensorflow, and have learned some knowledge of CNN.

Next, we will introduce how to obtain training data step by step, how to use tensorflow to build convolutional neural network, how to train, and how to test the trained classifier

1. Prepare training samples

Use Python library CAPTCHA to generate the training samples we need. The code is as follows:

import sys 

import os 
import shutil 
import random 
import time 
#Captcha is a library for generating captcha images, which can be installed by PIP install captcha 
from captcha.image import ImageCaptcha 
#Character set used to generate the verification code 
CHAR_SET = ['0','1','2','3','4','5','6','7','8','9'] 
#Length of character set 
#Length of verification code, each verification code is composed of 4 numbers 
#Storage path of verification code picture 
CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/' 
#The storage path of the verification code image used for model test, in which the verification code image is used as the test set 
TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/' 
#The number of verification code pictures used for model test, which are taken from the generated verification code pictures and put into the test set 
#Generate a verification code picture. There are 10000 verification codes for 4-digit decimal digits 
def generate_captcha_image(charSet = CHAR_SET, charSetLen=CHAR_SET_LEN, captchaImgPath=CAPTCHA_IMAGE_PATH):   
  k = 0 
  total = 1 
  for i in range(CAPTCHA_LEN): 
    total *= charSetLen 
  for i in range(charSetLen): 
    for j in range(charSetLen): 
      for m in range(charSetLen): 
        for n in range(charSetLen): 
          captcha_text = charSet[i] + charSet[j] + charSet[m] + charSet[n] 
          image = ImageCaptcha() 
          image.write(captcha_text, captchaImgPath + captcha_text + '.jpg') 
          k += 1 
          sys.stdout.write("\rCreating %d/%d" % (k, total)) 
#Take a part of the picture set of the verification code as the test set. These pictures do not participate in the training and are only used for the test of the model           
def prepare_test_set(): 
  fileNameList = []   
  for filePath in os.listdir(CAPTCHA_IMAGE_PATH): 
    captcha_name = filePath.split('/')[-1] 
  for i in range(TEST_IMAGE_NUMBER): 
    name = fileNameList[i] 
    shutil.move(CAPTCHA_IMAGE_PATH + name, TEST_IMAGE_PATH + name) 
if __name__ == '__main__': 
  generate_captcha_image(CHAR_SET, CHAR_SET_LEN, CAPTCHA_IMAGE_PATH) 

Run the above code to generate a verification code picture,

The generated verification code picture is shown in the following figure:

2. Build CNN and train classifier

The code is as follows:

import tensorflow as tf 
import numpy as np 
from PIL import Image 
import os 
import random 
import time 
#Storage path of verification code picture 
CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/' 
#Width of captcha picture 
#Height of captcha image 
#60% of captcha pictures are put into training set 
#Training set, file name of captcha picture used for training 
#Validation set, file name of the captcha picture used for model validation 


#The way to store the trained model 
MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/' 
def get_image_file_name(imgPath=CAPTCHA_IMAGE_PATH): 
  fileName = [] 
  total = 0 
  for filePath in os.listdir(imgPath): 
    captcha_name = filePath.split('/')[-1] 
    total += 1 
  return fileName, total 
#Convert the verification code to the label vector used in training, dimension is 40   
#For example, if the verification code is' 0296 ', the corresponding label is 
# [1 0 0 0 0 0 0 0 0 0 
# 0 0 1 0 0 0 0 0 0 0 
# 0 0 0 0 0 0 0 0 0 1 
# 0 0 0 0 0 0 1 0 0 0] 
def name2label(name): 
  label = np.zeros(CAPTCHA_LEN * CHAR_SET_LEN) 
  for i, c in enumerate(name): 
    idx = i*CHAR_SET_LEN + ord(c) - ord('0') 
    label[idx] = 1 
  return label 
#Get the data of the captcha image and its label     
def get_data_and_label(fileName, filePath=CAPTCHA_IMAGE_PATH): 
  pathName = os.path.join(filePath, fileName) 
  img = 
  #Convert to grayscale 
  img = img.convert("L")     
  image_array = np.array(img)   
  image_data = image_array.flatten()/255 
  image_label = name2label(fileName[0:CAPTCHA_LEN]) 
  return image_data, image_label 
#Generate a training batch   
def get_next_batch(batchSize=32, trainOrTest='train', step=0): 
  batch_data = np.zeros([batchSize, CAPTCHA_IMAGE_WIDHT*CAPTCHA_IMAGE_HEIGHT]) 
  batch_label = np.zeros([batchSize, CAPTCHA_LEN * CHAR_SET_LEN]) 
  if trainOrTest == 'validate':     
  totalNumber = len(fileNameList)  
  indexStart = step*batchSize   
  for i in range(batchSize): 
    index = (i + indexStart) % totalNumber 
    name = fileNameList[index]     
    img_data, img_label = get_data_and_label(name) 
    batch_data[i, : ] = img_data 
    batch_label[i, : ] = img_label  
  return batch_data, batch_label 
#Building convolution neural network and training 
def train_data_with_CNN(): 
  #Initialization weight 
  def weight_variable(shape, name='weight'): 
    init = tf.truncated_normal(shape, stddev=0.1) 
    var = tf.Variable(initial_value=init, name=name) 
    return var 
  #Initialize offset   
  def bias_variable(shape, name='bias'): 
    init = tf.constant(0.1, shape=shape) 
    var = tf.Variable(init, name=name) 
    return var 
  def conv2d(x, W, name='conv2d'): 
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME', name=name) 
  def max_pool_2X2(x, name='maxpool'): 
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name=name)    
  #Input layer 
  #Please note the name of X, which will be used when testing the model 
  X = tf.placeholder(tf.float32, [None, CAPTCHA_IMAGE_WIDHT * CAPTCHA_IMAGE_HEIGHT], name='data-input') 
  Y = tf.placeholder(tf.float32, [None, CAPTCHA_LEN * CHAR_SET_LEN], name='label-input')   
  x_input = tf.reshape(X, [-1, CAPTCHA_IMAGE_HEIGHT, CAPTCHA_IMAGE_WIDHT, 1], name='x-input') 
  #Dropout to prevent over fitting 
  #Please pay attention to keep_ Name of prob, which will be used when testing the model 
  keep_prob = tf.placeholder(tf.float32, name='keep-prob') 
  #First convolution 
  W_conv1 = weight_variable([5,5,1,32], 'W_conv1') 
  B_conv1 = bias_variable([32], 'B_conv1') 
  conv1 = tf.nn.relu(conv2d(x_input, W_conv1, 'conv1') + B_conv1) 
  conv1 = max_pool_2X2(conv1, 'conv1-pool') 
  conv1 = tf.nn.dropout(conv1, keep_prob) 
  #Second convolution 
  W_conv2 = weight_variable([5,5,32,64], 'W_conv2') 
  B_conv2 = bias_variable([64], 'B_conv2') 
  conv2 = tf.nn.relu(conv2d(conv1, W_conv2,'conv2') + B_conv2) 
  conv2 = max_pool_2X2(conv2, 'conv2-pool') 
  conv2 = tf.nn.dropout(conv2, keep_prob) 
  #Third level convolution 
  W_conv3 = weight_variable([5,5,64,64], 'W_conv3') 
  B_conv3 = bias_variable([64], 'B_conv3') 
  conv3 = tf.nn.relu(conv2d(conv2, W_conv3, 'conv3') + B_conv3) 
  conv3 = max_pool_2X2(conv3, 'conv3-pool') 
  conv3 = tf.nn.dropout(conv3, keep_prob) 
  #Full link layer 
  #After each pooling, the width and height of the image are reduced to half of the original, and after three times of pooling, the width and height are reduced by 8 times 
  W_fc1 = weight_variable([20*8*64, 1024], 'W_fc1') 
  B_fc1 = bias_variable([1024], 'B_fc1') 
  fc1 = tf.reshape(conv3, [-1, 20*8*64]) 
  fc1 = tf.nn.relu(tf.add(tf.matmul(fc1, W_fc1), B_fc1)) 
  fc1 = tf.nn.dropout(fc1, keep_prob) 
  #Output layer 
  W_fc2 = weight_variable([1024, CAPTCHA_LEN * CHAR_SET_LEN], 'W_fc2') 
  B_fc2 = bias_variable([CAPTCHA_LEN * CHAR_SET_LEN], 'B_fc2') 
  output = tf.add(tf.matmul(fc1, W_fc2), B_fc2, 'output') 
  loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=output)) 
  optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) 
  predict = tf.reshape(output, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='predict') 
  labels = tf.reshape(Y, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='labels') 
  #Forecast results 
  #Please pay attention to predict_ max_ The name of IDX, which will be used when testing the model 
  predict_max_idx = tf.argmax(predict, axis=2, name='predict_max_idx') 
  labels_max_idx = tf.argmax(labels, axis=2, name='labels_max_idx') 
  predict_correct_vec = tf.equal(predict_max_idx, labels_max_idx) 
  accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32)) 
  saver = tf.train.Saver() 
  with tf.Session() as sess: 
    steps = 0 
    for epoch in range(6000): 
      train_data, train_label = get_next_batch(64, 'train', steps), feed_dict={X : train_data, Y : train_label, keep_prob:0.75}) 
      if steps % 100 == 0: 
        test_data, test_label = get_next_batch(100, 'validate', steps) 
        acc =, feed_dict={X : test_data, Y : test_label, keep_prob:1.0}) 
        print("steps=%d, accuracy=%f" % (steps, acc)) 
        if acc > 0.99: 
, MODEL_SAVE_PATH+"crack_captcha.model", global_step=steps) 
      steps += 1 
if __name__ == '__main__':   
  image_filename_list, total = get_image_file_name(CAPTCHA_IMAGE_PATH) 
  #Disorder order 
  trainImageNumber = int(total * TRAIN_IMAGE_PERCENT) 
  #Split into test sets 
  TRAINING_IMAGE_NAME = image_filename_list[ : trainImageNumber] 
  #And validation set 
  VALIDATION_IMAGE_NAME = image_filename_list[trainImageNumber : ] 
  print('Training finished')

Run the above code, start training, training will take some time, if there is no GPU, it will be slower,

After training, the output is as follows. After 4100 iterations, the accuracy of the trained classifier model on the verification set is 99.5%

The generated model files are as follows, which will be used in model testing

3. Test model

Write code and test the trained model

import tensorflow as tf 

import numpy as np 
from PIL import Image 
import os 
import matplotlib.pyplot as plt  
MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/' 
TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/' 
def get_image_data_and_name(fileName, filePath=TEST_IMAGE_PATH): 
  pathName = os.path.join(filePath, fileName) 
  img = 
  #Convert to grayscale 
  img = img.convert("L")     
  image_array = np.array(img)   
  image_data = image_array.flatten()/255 
  image_name = fileName[0:CAPTCHA_LEN] 
  return image_data, image_name 
def digitalStr2Array(digitalStr): 
  digitalList = [] 
  for c in digitalStr: 
    digitalList.append(ord(c) - ord('0')) 
  return np.array(digitalList) 
def model_test(): 
  nameList = [] 
  for pathName in os.listdir(TEST_IMAGE_PATH): 
  totalNumber = len(nameList) 
  #Load graph 
  saver = tf.train.import_meta_graph(MODEL_SAVE_PATH+"crack_captcha.model-4100.meta") 
  graph = tf.get_default_graph() 
  #Get the sensor from the graph. Their name is defined when building the graph (see the code in step 2 above) 
  input_holder = graph.get_tensor_by_name("data-input:0") 
  keep_prob_holder = graph.get_tensor_by_name("keep-prob:0") 
  predict_max_idx = graph.get_tensor_by_name("predict_max_idx:0") 
  with tf.Session() as sess: 
    saver.restore(sess, tf.train.latest_checkpoint(MODEL_SAVE_PATH)) 
    count = 0 
    for fileName in nameList: 
      img_data, img_name = get_image_data_and_name(fileName, TEST_IMAGE_PATH) 
      predict =, feed_dict={input_holder:[img_data], keep_prob_holder : 1.0})       
      filePathName = TEST_IMAGE_PATH + fileName 
      img = 
      predictValue = np.squeeze(predict) 
      rightValue = digitalStr2Array(img_name) 
      if np.array_equal(predictValue, rightValue): 
        Result: 'correct' 
        count += 1 
        Result = 'error'       
      Print ('actual value: {}, predicted value: {}, test result: {} '. Format (rightvalue, predictvalue, result)) 
    Print ('accuracy:%. 2F%%% D /% d) '% (count * 100 / totalnumber, count, totalnumber)) 
if __name__ == '__main__': 

The test results of the model are as follows, and the accuracy of recognition on the test set is 94%

Here are two verification codes for identifying errors

The above article uses tensorflow to build and train your own CNN to do a simple verification code recognition method, which is all the content that Xiaobian shared to you. I hope it can give you a reference, and I hope you can support developepaer more.