TF — Basic Learning – Data Processing – model construction, etc

Time:2022-5-13

Import of packages

import matplotlib as mpl

import matplotlib.pyplot as plt

import numpy as np

import sklearn

import pandas as pd

import tensorflow as tf

from tensorflow import keras

import os, sys, time 

print(sys.version_info)

for module in tf, mpl, np, pd, sklearn, tf, keras:

    print(module.__name__, module.__version__)
sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)

tensorflow 2.1.0

matplotlib 3.2.0

numpy 1.18.1

pandas 1.0.1

sklearn 0.22.2.post1

tensorflow 2.1.0

tensorflow_core.python.keras.api._v2.keras 2.2.4-tf

TF data module processing data

dataset = tf.data.Dataset.from_tensor_slices(np.arange(10,dtype=np.float32))

print(dataset)

for _ in dataset:

    print(_)
<TensorSliceDataset shapes: (), types: tf.float32>

tf.Tensor(0.0, shape=(), dtype=float32)

tf.Tensor(1.0, shape=(), dtype=float32)

tf.Tensor(2.0, shape=(), dtype=float32)

tf.Tensor(3.0, shape=(), dtype=float32)

tf.Tensor(4.0, shape=(), dtype=float32)

tf.Tensor(5.0, shape=(), dtype=float32)

tf.Tensor(6.0, shape=(), dtype=float32)

tf.Tensor(7.0, shape=(), dtype=float32)

tf.Tensor(8.0, shape=(), dtype=float32)

tf.Tensor(9.0, shape=(), dtype=float32)
'''
tf.Tensor([0. 1. 2. 3. 4. 5. 6.], shape=(7,), dtype=float32)

tf.Tensor([7. 8. 9. 0. 1. 2. 3.], shape=(7,), dtype=float32)

tf.Tensor([4. 5. 6. 7. 8. 9. 0.], shape=(7,), dtype=float32)

tf.Tensor([1. 2. 3. 4. 5. 6. 7.], shape=(7,), dtype=float32)

tf.Tensor([8. 9.], shape=(2,), dtype=float32)
# interleave
tf.Tensor(0.0, shape=(), dtype=float32)

tf.Tensor(1.0, shape=(), dtype=float32)

tf.Tensor(2.0, shape=(), dtype=float32)

tf.Tensor(3.0, shape=(), dtype=float32)

tf.Tensor(4.0, shape=(), dtype=float32)

tf.Tensor(7.0, shape=(), dtype=float32)

tf.Tensor(8.0, shape=(), dtype=float32)

tf.Tensor(9.0, shape=(), dtype=float32)

tf.Tensor(0.0, shape=(), dtype=float32)

tf.Tensor(1.0, shape=(), dtype=float32)

tf.Tensor(4.0, shape=(), dtype=float32)

tf.Tensor(5.0, shape=(), dtype=float32)

tf.Tensor(6.0, shape=(), dtype=float32)

tf.Tensor(7.0, shape=(), dtype=float32)

tf.Tensor(8.0, shape=(), dtype=float32)

tf.Tensor(1.0, shape=(), dtype=float32)

tf.Tensor(2.0, shape=(), dtype=float32)

tf.Tensor(3.0, shape=(), dtype=float32)

tf.Tensor(4.0, shape=(), dtype=float32)

tf.Tensor(5.0, shape=(), dtype=float32)

tf.Tensor(8.0, shape=(), dtype=float32)

tf.Tensor(9.0, shape=(), dtype=float32)

tf.Tensor(5.0, shape=(), dtype=float32)

tf.Tensor(6.0, shape=(), dtype=float32)

tf.Tensor(2.0, shape=(), dtype=float32)

tf.Tensor(3.0, shape=(), dtype=float32)

tf.Tensor(9.0, shape=(), dtype=float32)

tf.Tensor(0.0, shape=(), dtype=float32)

tf.Tensor(6.0, shape=(), dtype=float32)

tf.Tensor(7.0, shape=(), dtype=float32)
#Tfrecord file format
value: "machine learning"

value: "cc150"

value: 15.5

value: 9.5

value: 7.0

value: 8.0

value: 42

feature {

  key: "age"

  value {

    int64_list {

      value: 42

    }

  }

}

feature {

  key: "favorite_book"

  value {

    bytes_list {

      value: "machine learning"

      value: "cc150"

    }

  }

}

feature {

  key: "hours"

  value {

    float_list {

      value: 15.5

      value: 9.5

      value: 7.0

      value: 8.0

    }

  }

}

TF operation

Convert the function written by yourself into TF function

Advantage: fast

# tf. function and auto-graph
tf.Tensor(-0.63212055, shape=(), dtype=float32)

tf.Tensor([-0.95021296 -0.917915  ], shape=(2,), dtype=float32)

tf.Tensor(-0.63212055, shape=(), dtype=float32)

tf.Tensor([-0.95021296 -0.917915  ], shape=(2,), dtype=float32)

tf.Tensor(-0.63212055, shape=(), dtype=float32)

tf.Tensor([-0.95021296 -0.917915  ], shape=(2,), dtype=float32)
def tf__scaled_elu(z, scale=None, alpha=None):

  do_return = False

  retval_ = ag__.UndefinedReturnValue()

  with ag__.FunctionScope('scaled_elu', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:

    is_postive = ag__.converted_call(tf.greater_equal, (z, 0.0), None, fscope)

    do_return = True

    retval_ = fscope.mark_return_value(scale * ag__.converted_call(tf.where, (is_postive, z, alpha * ag__.converted_call(tf.nn.elu, (z,), None, fscope)), None, fscope))

  do_return,

  return ag__.retval(retval_)
var = tf. Variable(0.)
tf.Tensor(21.0, shape=(), dtype=float32)
@tf. function()
tf.Tensor([ 1.  8. 27.], shape=(3,), dtype=float32)

tf.Tensor([ 1  8 27], shape=(3,), dtype=int32)
#We add a variable limit to the function
Python inputs incompatible with input_signature:

  inputs: (

    tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))

  input_signature: (

    TensorSpec(shape=(None,), dtype=tf.int32, name='x'))
cube_func_int32 = cube.get_concrete_function(tf.TensorSpec([None],tf.int32, name='x'))

print(cube_func_int32)

print(cube_func_int32 is cube.get_concrete_function(tf.TensorSpec([5], tf.int32, name='x')))

print(cube_func_int32 is cube.get_concrete_function(tf.constant([1,2,3])))

print(cube_func_int32.graph)

print(cube_func_int32.graph.get_operations())
<tensorflow.python.eager.function.ConcreteFunction object at 0x0000021D901E6EF0>

True

True

FuncGraph(name=cube, id=2325995326824)

[<tf.Operation 'x' type=Placeholder>, <tf.Operation 'Pow/y' type=Const>, <tf.Operation 'Pow' type=Pow>, <tf.Operation 'Identity' type=Identity>]
pow_ op =  cube_ func_ int32. graph. get_ operations()[2]
name: "Pow"

op: "Pow"

input: "x"

input: "Pow/y"

attr {

  key: "T"

  value {

    type: DT_INT32

  }

}

Pow

[<tf.Tensor 'x:0' shape=(None,) dtype=int32>, <tf.Tensor 'Pow/y:0' shape=() dtype=int32>]

[<tf.Tensor 'Pow:0' shape=(None,) dtype=int32>]
#Approximate derivative
7.999999999994678

(8.999999999993236, 47.999999999994714)
x1 = tf. Variable(2.0)
tf.Tensor(9.0, shape=(), dtype=float32)

GradientTape.gradient can only be called once on non-persistent tapes.
#After adding persistent, we can calculate the partial derivative twice, and the results are 9 and 42, which has a large error compared with the above results
tf.Tensor(9.0, shape=(), dtype=float32)

tf.Tensor(42.0, shape=(), dtype=float32)
#Two partial derivatives are obtained at one time
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x1 = tf.constant(2.0)

x2 = tf.constant(3.0)

with tf.GradientTape() as tape:

    tape.watch(x1)

    tape.watch(x2)

    z = g(x1,x2)

print(tape.gradient(z, [x1,x2]))
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x = tf.Variable(5.)

with tf.GradientTape() as tape:

    z1 = 3*x

    z2 = x**2

tape.gradient([z1,z2],x)
<tf.Tensor: shape=(), dtype=float32, numpy=13.0>
#Simulated primary gradient descent
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
#Simulated primary gradient descent
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

Preparation of data sets

Fashion MNIST is an image data set that replaces MNIST handwritten numeral set. It is provided by the research department of zalando, a German fashion technology company. It covers the front pictures of 70000 different products from 10 categories. The size, format and training set / test set division of fashion MNIST are completely consistent with the original MNIST. 60000 / 10000 training test data division, 28×28 gray image. You can directly use it to test the performance of your machine learning and deep learning algorithms without changing any code.

  • 60000 training images and corresponding labels;

  • 10000 test images and corresponding labels;

  • 10 categories;

  • 28×28 resolution per image;

fashion_ mnist = keras. datasets. fashion_ mnist
x_valid.shape, y_valid.shape: (5000, 28, 28) (5000,)

x_train.shape, y_train.shape (55000, 28, 28) (55000,)

x_test.shape, y_test.shape (10000, 28, 28) (10000,)

Data set sample display

#Data set sample display

Normalize the data

Definition: limit the data to a certain range after processing. For example, it is usually limited to interval[0, 1]perhaps[-1, 1]

Common normalization methods:

  • Max min normalization: \ frac {x-x {min} {x {Max} – x_ {min}}

  • Z-score Standardization: \ frac {X – \ Mu} {STD} \ left (\ Mu is the standard deviation, STD is the variance \ right)

'''
metric = keras.metrics.MeanSquaredError()

print(metric([5.],[2.,1],))

metric.reset_states

print(metric([0.],[1.]))

print(metric.result())
tf.Tensor(12.5, shape=(), dtype=float32)

tf.Tensor(6.75, shape=(), dtype=float32)

tf.Tensor(6.75, shape=(), dtype=float32)

Batch: traverse the training set metric

  • Automatic derivation

Epoch end: validate set metric

epochs = 10
1718

Epoch 0 train mse: 8.444045  valid mse: 8.318773

Epoch 1 train mse: 8.288684  valid mse: 8.272807

Epoch 2 train mse: 8.279286  valid mse: 8.326979

Epoch 3 train mse: 8.27741   valid mse: 8.239223

Epoch 4 train mse: 8.273457  valid mse: 8.242294

Epoch 5 train mse: 8.217403  valid mse: 8.25629

Epoch 6 train mse: 8.296442  valid mse: 8.23847

Epoch 7 train mse: 8.26509   valid mse: 8.238471

Epoch 8 train mse: 8.295274  valid mse: 8.272746

Epoch 9 train mse: 8.289257  valid mse: 8.2716055

Construction of model

tf.keras.models.sequential()

Common model construction:

model = keras. models. Sequential([

Construction of batch normalization plus activation function model:

model. add(keras.layers.Flatten(input_shape=[28,28]))

Add dropout layer. Generally, we only add dropout in the last few layers:

# deep_ neural_ Construction of network model (20 layers)

Construction of wide & deep model:

#Using API to implement wide & deep model

For the construction of multi input wide & deep model:

#The first five features are used as inputs to the wide model

For the construction of wide & deep model with multiple inputs and multiple outputs:

#The first five features are used as inputs to the wide model

Super parameter:

Neural networks have many invariable parameters in the training process

  • Network structure parameters: several layers, width of each layer, activation function of each layer, etc

  • Training parameter: batch_ Size, learning rate, learning rate attenuation algorithm, etc

It takes a lot of manpower to try by hand

Hyper parameter search strategy:

  • Web search

  • random search

  • Genetic algorithm search

  • heuristic search

Encapsulate the keras model with sklearn:

'''

Two ways to customize layers

#Two ways to customize layers
#Construction of model
The 0th data in the training set: (28, 28)

Super parameter

#For the deep & wide model, our training data also needs to be split - yes, that deep & wide model has to be used

Data training

Add callback function PG

  • Tensorboard:

    • Metrics summary plots

    • Training graph visualization

    • Activation histograms

    • Sampled profiling

  • EarlyStopping

    • Focus on a certain indicator, such as hyper parameter

      One of the super parameters is the number of training cycles (epochs): that is, how many times should the data set be traversed completely (one epoch at a time)? If the number of epochs is too small, the network may be under fitted (that is, the learning of stereotyped data is not enough); If the number of epochs is too large, there may be over fitting (i.e. the network fits the “noise” in the training data rather than the signal).

      The early stop method aims to solve the problem that the number of epochs needs to be set manually. It can also be regarded as a regularization method that can avoid over fitting of the network (similar to L1 / L2 weight attenuation and discarding method).

      The root cause is that continuing training will lead to a decline in the accuracy of the test set.

      The reason for the decline in test accuracy caused by continuous training may be 1 Over fitting 2 Excessive learning rate leads to non convergence

Data training for layers

logdir = os. path. join('callbacks')
Train on 55000 samples, validate on 5000 samples

Epoch 1/10

55000/55000 [==============================] - 5s 93us/sample - loss: inf - accuracy: 0.0982 - val_loss: 2.3041 - val_accuracy: 0.0984

Epoch 2/10

55000/55000 [==============================] - 5s 96us/sample - loss: 2.3027 - accuracy: 0.0982 - val_loss: 2.3040 - val_accuracy: 0.0986

Epoch 3/10

55000/55000 [==============================] - 5s 86us/sample - loss: 2.3027 - accuracy: 0.1006 - val_loss: 2.3041 - val_accuracy: 0.0986

Epoch 4/10

55000/55000 [==============================] - 4s 81us/sample - loss: 2.3027 - accuracy: 0.0987 - val_loss: 2.3042 - val_accuracy: 0.0914

Epoch 5/10

55000/55000 [==============================] - 7s 121us/sample - loss: 2.3027 - accuracy: 0.0984 - val_loss: 2.3040 - val_accuracy: 0.0980

Epoch 6/10

55000/55000 [==============================] - 11s 194us/sample - loss: 2.3027 - accuracy: 0.0972 - val_loss: 2.3039 - val_accuracy: 0.0914

Epoch 7/10

55000/55000 [==============================] - 15s 267us/sample - loss: 2.3027 - accuracy: 0.0989 - val_loss: 2.3042 - val_accuracy: 0.0976

Epoch 8/10

55000/55000 [==============================] - 15s 281us/sample - loss: 2.3027 - accuracy: 0.0985 - val_loss: 2.3041 - val_accuracy: 0.0914

Epoch 9/10

55000/55000 [==============================] - 15s 271us/sample - loss: 2.3027 - accuracy: 0.0978 - val_loss: 2.3042 - val_accuracy: 0.0914

Epoch 10/10

55000/55000 [==============================] - 11s 197us/sample - loss: 2.3027 - accuracy: 0.1002 - val_loss: 2.3041 - val_accuracy: 0.0914

{'loss': [inf,

  2.3027012555209074,

  2.302684170844338,

  2.302700057740645,

  2.3026898735739967,

  2.3026884390050713,

  2.3026932554765183,

  2.302697267150879,

  2.3026952650243584,

  2.3026737274516713],

 'accuracy': [0.09821818,

  0.09821818,

  0.10056364,

  0.09872727,

  0.0984,

  0.09716364,

  0.09885454,

  0.098527275,

  0.097763635,

  0.10023636],

 'val_loss': [2.3040645393371584,

  2.3040028648376465,

  2.304090444946289,

  2.304177123260498,

  2.3040085578918457,

  2.303938995361328,

  2.304162268447876,

  2.3041053184509277,

  2.304207458496094,

  2.304115872955322],

 'val_accuracy': [0.0984,

  0.0986,

  0.0986,

  0.0914,

  0.098,

  0.0914,

  0.0976,

  0.0914,

  0.0914,

  0.0914]}

Matloplib visualization

#Matloplib visualization
plot_ learning_ cruves(history)

'''
model.evaluate(x_test, y_test)
10000/10000 [==============================] - 2s 198us/sample - loss: 2.3026 - accuracy: 0.1000

[2.3026404083251952, 0.1]

This work adoptsCC agreementThe author, link and reprint of this article must be indicated

article!! Started on my blogStray_Camel(^U^)ノ~YO

Recommended Today

Microservice design pattern — Overview (notes)

The modes are divided into three groups: Infrastructure related mode: Infrastructure related; Application infrastructure related: application level infrastructure; Application related mode group; Mode decomposition Service splitting mode Decomposition mode according to business capability According to the subdomain decomposition mode Communication related Communication style Service discovery reliability Transactional messages External API Data consistency Solve distributed log […]