Import of packages
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import os, sys, time
print(sys.version_info)
for module in tf, mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
tensorflow 2.1.0
matplotlib 3.2.0
numpy 1.18.1
pandas 1.0.1
sklearn 0.22.2.post1
tensorflow 2.1.0
tensorflow_core.python.keras.api._v2.keras 2.2.4-tf
TF data module processing data
dataset = tf.data.Dataset.from_tensor_slices(np.arange(10,dtype=np.float32))
print(dataset)
for _ in dataset:
print(_)
<TensorSliceDataset shapes: (), types: tf.float32>
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(7.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
tf.Tensor(9.0, shape=(), dtype=float32)
'''
tf.Tensor([0. 1. 2. 3. 4. 5. 6.], shape=(7,), dtype=float32)
tf.Tensor([7. 8. 9. 0. 1. 2. 3.], shape=(7,), dtype=float32)
tf.Tensor([4. 5. 6. 7. 8. 9. 0.], shape=(7,), dtype=float32)
tf.Tensor([1. 2. 3. 4. 5. 6. 7.], shape=(7,), dtype=float32)
tf.Tensor([8. 9.], shape=(2,), dtype=float32)
# interleave
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(7.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(7.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(7.0, shape=(), dtype=float32)
#Tfrecord file format
value: "machine learning"
value: "cc150"
value: 15.5
value: 9.5
value: 7.0
value: 8.0
value: 42
feature {
key: "age"
value {
int64_list {
value: 42
}
}
}
feature {
key: "favorite_book"
value {
bytes_list {
value: "machine learning"
value: "cc150"
}
}
}
feature {
key: "hours"
value {
float_list {
value: 15.5
value: 9.5
value: 7.0
value: 8.0
}
}
}
TF operation
Convert the function written by yourself into TF function
Advantage: fast
# tf. function and auto-graph
tf.Tensor(-0.63212055, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
tf.Tensor(-0.63212055, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
tf.Tensor(-0.63212055, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
def tf__scaled_elu(z, scale=None, alpha=None):
do_return = False
retval_ = ag__.UndefinedReturnValue()
with ag__.FunctionScope('scaled_elu', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
is_postive = ag__.converted_call(tf.greater_equal, (z, 0.0), None, fscope)
do_return = True
retval_ = fscope.mark_return_value(scale * ag__.converted_call(tf.where, (is_postive, z, alpha * ag__.converted_call(tf.nn.elu, (z,), None, fscope)), None, fscope))
do_return,
return ag__.retval(retval_)
var = tf. Variable(0.)
tf.Tensor(21.0, shape=(), dtype=float32)
@tf. function()
tf.Tensor([ 1. 8. 27.], shape=(3,), dtype=float32)
tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)
#We add a variable limit to the function
Python inputs incompatible with input_signature:
inputs: (
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
input_signature: (
TensorSpec(shape=(None,), dtype=tf.int32, name='x'))
cube_func_int32 = cube.get_concrete_function(tf.TensorSpec([None],tf.int32, name='x'))
print(cube_func_int32)
print(cube_func_int32 is cube.get_concrete_function(tf.TensorSpec([5], tf.int32, name='x')))
print(cube_func_int32 is cube.get_concrete_function(tf.constant([1,2,3])))
print(cube_func_int32.graph)
print(cube_func_int32.graph.get_operations())
<tensorflow.python.eager.function.ConcreteFunction object at 0x0000021D901E6EF0>
True
True
FuncGraph(name=cube, id=2325995326824)
[<tf.Operation 'x' type=Placeholder>, <tf.Operation 'Pow/y' type=Const>, <tf.Operation 'Pow' type=Pow>, <tf.Operation 'Identity' type=Identity>]
pow_ op = cube_ func_ int32. graph. get_ operations()[2]
name: "Pow"
op: "Pow"
input: "x"
input: "Pow/y"
attr {
key: "T"
value {
type: DT_INT32
}
}
Pow
[<tf.Tensor 'x:0' shape=(None,) dtype=int32>, <tf.Tensor 'Pow/y:0' shape=() dtype=int32>]
[<tf.Tensor 'Pow:0' shape=(None,) dtype=int32>]
#Approximate derivative
7.999999999994678
(8.999999999993236, 47.999999999994714)
x1 = tf. Variable(2.0)
tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.
#After adding persistent, we can calculate the partial derivative twice, and the results are 9 and 42, which has a large error compared with the above results
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)
#Two partial derivatives are obtained at one time
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
tape.watch(x1)
tape.watch(x2)
z = g(x1,x2)
print(tape.gradient(z, [x1,x2]))
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x = tf.Variable(5.)
with tf.GradientTape() as tape:
z1 = 3*x
z2 = x**2
tape.gradient([z1,z2],x)
<tf.Tensor: shape=(), dtype=float32, numpy=13.0>
#Simulated primary gradient descent
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
#Simulated primary gradient descent
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
Preparation of data sets
Fashion MNIST is an image data set that replaces MNIST handwritten numeral set. It is provided by the research department of zalando, a German fashion technology company. It covers the front pictures of 70000 different products from 10 categories. The size, format and training set / test set division of fashion MNIST are completely consistent with the original MNIST. 60000 / 10000 training test data division, 28×28 gray image. You can directly use it to test the performance of your machine learning and deep learning algorithms without changing any code.
-
60000 training images and corresponding labels;
-
10000 test images and corresponding labels;
-
10 categories;
-
28×28 resolution per image;
fashion_ mnist = keras. datasets. fashion_ mnist
x_valid.shape, y_valid.shape: (5000, 28, 28) (5000,)
x_train.shape, y_train.shape (55000, 28, 28) (55000,)
x_test.shape, y_test.shape (10000, 28, 28) (10000,)
Data set sample display
#Data set sample display
Normalize the data
Definition: limit the data to a certain range after processing. For example, it is usually limited to interval
[0, 1]
perhaps[-1, 1]
Common normalization methods:
-
Max min normalization: \ frac {x-x {min} {x {Max} – x_ {min}}
-
Z-score Standardization: \ frac {X – \ Mu} {STD} \ left (\ Mu is the standard deviation, STD is the variance \ right)
'''
metric = keras.metrics.MeanSquaredError()
print(metric([5.],[2.,1],))
metric.reset_states
print(metric([0.],[1.]))
print(metric.result())
tf.Tensor(12.5, shape=(), dtype=float32)
tf.Tensor(6.75, shape=(), dtype=float32)
tf.Tensor(6.75, shape=(), dtype=float32)
Batch: traverse the training set metric
- Automatic derivation
Epoch end: validate set metric
epochs = 10
1718
Epoch 0 train mse: 8.444045 valid mse: 8.318773
Epoch 1 train mse: 8.288684 valid mse: 8.272807
Epoch 2 train mse: 8.279286 valid mse: 8.326979
Epoch 3 train mse: 8.27741 valid mse: 8.239223
Epoch 4 train mse: 8.273457 valid mse: 8.242294
Epoch 5 train mse: 8.217403 valid mse: 8.25629
Epoch 6 train mse: 8.296442 valid mse: 8.23847
Epoch 7 train mse: 8.26509 valid mse: 8.238471
Epoch 8 train mse: 8.295274 valid mse: 8.272746
Epoch 9 train mse: 8.289257 valid mse: 8.2716055
Construction of model
tf.keras.models.sequential()
Common model construction:
model = keras. models. Sequential([
Construction of batch normalization plus activation function model:
model. add(keras.layers.Flatten(input_shape=[28,28]))
Add dropout layer. Generally, we only add dropout in the last few layers:
# deep_ neural_ Construction of network model (20 layers)
Construction of wide & deep model:
#Using API to implement wide & deep model
For the construction of multi input wide & deep model:
#The first five features are used as inputs to the wide model
For the construction of wide & deep model with multiple inputs and multiple outputs:
#The first five features are used as inputs to the wide model
Super parameter:
Neural networks have many invariable parameters in the training process
-
Network structure parameters: several layers, width of each layer, activation function of each layer, etc
-
Training parameter: batch_ Size, learning rate, learning rate attenuation algorithm, etc
It takes a lot of manpower to try by hand
Hyper parameter search strategy:
-
Web search
-
random search
-
Genetic algorithm search
-
heuristic search
Encapsulate the keras model with sklearn:
'''
Two ways to customize layers
#Two ways to customize layers
#Construction of model
The 0th data in the training set: (28, 28)
Super parameter
#For the deep & wide model, our training data also needs to be split - yes, that deep & wide model has to be used
Data training
Add callback function PG
-
Tensorboard:
-
Metrics summary plots
-
Training graph visualization
-
Activation histograms
-
Sampled profiling
-
-
EarlyStopping
-
Focus on a certain indicator, such as hyper parameter
One of the super parameters is the number of training cycles (epochs): that is, how many times should the data set be traversed completely (one epoch at a time)? If the number of epochs is too small, the network may be under fitted (that is, the learning of stereotyped data is not enough); If the number of epochs is too large, there may be over fitting (i.e. the network fits the “noise” in the training data rather than the signal).
The early stop method aims to solve the problem that the number of epochs needs to be set manually. It can also be regarded as a regularization method that can avoid over fitting of the network (similar to L1 / L2 weight attenuation and discarding method).
The root cause is that continuing training will lead to a decline in the accuracy of the test set.
The reason for the decline in test accuracy caused by continuous training may be 1 Over fitting 2 Excessive learning rate leads to non convergence
-
Data training for layers
logdir = os. path. join('callbacks')
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
55000/55000 [==============================] - 5s 93us/sample - loss: inf - accuracy: 0.0982 - val_loss: 2.3041 - val_accuracy: 0.0984
Epoch 2/10
55000/55000 [==============================] - 5s 96us/sample - loss: 2.3027 - accuracy: 0.0982 - val_loss: 2.3040 - val_accuracy: 0.0986
Epoch 3/10
55000/55000 [==============================] - 5s 86us/sample - loss: 2.3027 - accuracy: 0.1006 - val_loss: 2.3041 - val_accuracy: 0.0986
Epoch 4/10
55000/55000 [==============================] - 4s 81us/sample - loss: 2.3027 - accuracy: 0.0987 - val_loss: 2.3042 - val_accuracy: 0.0914
Epoch 5/10
55000/55000 [==============================] - 7s 121us/sample - loss: 2.3027 - accuracy: 0.0984 - val_loss: 2.3040 - val_accuracy: 0.0980
Epoch 6/10
55000/55000 [==============================] - 11s 194us/sample - loss: 2.3027 - accuracy: 0.0972 - val_loss: 2.3039 - val_accuracy: 0.0914
Epoch 7/10
55000/55000 [==============================] - 15s 267us/sample - loss: 2.3027 - accuracy: 0.0989 - val_loss: 2.3042 - val_accuracy: 0.0976
Epoch 8/10
55000/55000 [==============================] - 15s 281us/sample - loss: 2.3027 - accuracy: 0.0985 - val_loss: 2.3041 - val_accuracy: 0.0914
Epoch 9/10
55000/55000 [==============================] - 15s 271us/sample - loss: 2.3027 - accuracy: 0.0978 - val_loss: 2.3042 - val_accuracy: 0.0914
Epoch 10/10
55000/55000 [==============================] - 11s 197us/sample - loss: 2.3027 - accuracy: 0.1002 - val_loss: 2.3041 - val_accuracy: 0.0914
{'loss': [inf,
2.3027012555209074,
2.302684170844338,
2.302700057740645,
2.3026898735739967,
2.3026884390050713,
2.3026932554765183,
2.302697267150879,
2.3026952650243584,
2.3026737274516713],
'accuracy': [0.09821818,
0.09821818,
0.10056364,
0.09872727,
0.0984,
0.09716364,
0.09885454,
0.098527275,
0.097763635,
0.10023636],
'val_loss': [2.3040645393371584,
2.3040028648376465,
2.304090444946289,
2.304177123260498,
2.3040085578918457,
2.303938995361328,
2.304162268447876,
2.3041053184509277,
2.304207458496094,
2.304115872955322],
'val_accuracy': [0.0984,
0.0986,
0.0986,
0.0914,
0.098,
0.0914,
0.0976,
0.0914,
0.0914,
0.0914]}
Matloplib visualization
#Matloplib visualization
plot_ learning_ cruves(history)
'''
model.evaluate(x_test, y_test)
10000/10000 [==============================] - 2s 198us/sample - loss: 2.3026 - accuracy: 0.1000
[2.3026404083251952, 0.1]
This work adoptsCC agreementThe author, link and reprint of this article must be indicated