Time：2022-5-13

# Python machine learning manual — from data preprocessing to deep learning

This book is similar to a reference book or dictionary. It is very clear about the calling and use scenarios of Python specific code. It feels that although it is a reference book, it should be able to have a deeper understanding of these libraries commonly used in Python in machine learning and be more proficient in application.

The following is the actual operation according to the code in the book. The notes basically indicate the function of each sentence of code (written before the code in this sentence) and the output result of print (written after print). It is not necessarily carried out in strict accordance with the contents of the book. The sequence is slightly adjusted according to the specific situation of the code running, and some own understandings are added.

If you copy it to your own environment and run it again for output, I believe the understanding will be deeper and clearer.

Each code block in the blog represents a complete running result, which can be directly copied and run in this unit.

# 01 – vectors, matrices and arrays

This section is mainly about the basic application of numpy library.
include:

1. Initialization and format
2. Data reading and batch processing
3. numerical calculation
4. Matrix calculation
5. random number

## 01-1 initialization and format

import numpy as np

#Create a row vector
vector = np.array([1,2,3])
#Create a column vector
vector = np.array([[1],[2],[3]])
print(vector)
#Create a matrix
matrix = np.array([[0,0],[2,0],[0,3]])
print(type(matrix)) #
#View the number of rows and columns
print(matrix.shape) #(3, 2)
#View the number of elements
print(matrix.size) #6
#View dimension
print(matrix.ndim) #2
#Convert to matrix format mat
matrix = np.mat(matrix)
print(type(matrix)) #

from scipy import sparse
#Convert to compressed sparse row matrix
matrix = sparse.csr_matrix(matrix)
print(matrix)
#   (1, 0)        2
#   (2, 1)        3
print(type(matrix)) #

## 01-2 data reading and batch processing

import numpy as np

matrix = np.array([[1,2,3],[4,5,6],[7,8,9]])
#Output the elements of the second row and third column of the matrix (subscript starts from 0)
#Note that the subscript here is different from the two-dimensional list
print(matrix[1,2]) # 6

#One dimensional slice, [start position: end position], including the start position, excluding the end position element
#Output all elements before the third element
vector = np.array([1,2,3])
print(vector[0:2])
# [1 2]

#Two dimensional slice is similar to one-dimensional slice [start position: end position], and different dimensions are separated by ','
#'1:' second line and all subsequent lines
#': 2' all columns before the third column
print(matrix[1:,:2])
# [[4 5]
#  [7 8]]

#Apply a function to multiple elements in an array at the same time
#Create a function, add_ 100 return: enter the value of + 100
add_100 = lambda i : i + 100
#Create vectorization function
#Apply this function to all elements of the matrix
# [[101 102 103]
#  [104 105 106]
#  [107 108 109]]

## 01-3 numerical calculation

import numpy as np

matrix = np.array([[1,2,3],[4,5,6],[7,8,9]])
#Find the largest element in the matrix
print(np.max(matrix)) # 9
#Find the smallest element in the matrix
print(np.min(matrix)) # 1
#Find the largest element in each column. Axis refers to the dimension
print(np.max(matrix, axis = 0)) # [7 8 9]
#Find the largest element in each row
print(np.max(matrix, axis = 1)) # [3 6 9]

#Calculate the average value of the matrix
print(np.mean(matrix)) # 5.0
print(type(np.mean(matrix))) #

#Calculate the variance of the matrix
print(np.var(matrix)) # 6.666666666666667
print(type(np.var(matrix))) #

#Calculate the standard deviation of the matrix
print(np.std(matrix)) # 2.581988897471611
print(type(np.std(matrix))) #

#In the above calculations, the parameter axis can be added to calculate each row or column
#Average each row
print(np.mean(matrix, axis = 1)) # [2. 5. 8.]

## 01-4 matrix calculation

### 01-4-1 matrix deformation and matrix transpose

import numpy as np

matrix = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])

#Matrix deformation, do not change the element value, change the shape of the matrix
#Change the matrix from 4 * 3 to 2 * 6
print(matrix.shape) # (4, 3)
matrix = matrix.reshape(2,6)
print(matrix.shape) # (2, 6)
print(matrix)
# [[ 1  2  3  4  5  6]
#  [ 7  8  9 10 11 12]]
#You can also set one of the dimensions as the default '- 1', which will take the number after integer division as the default dimension
matrix = matrix.reshape(3,-1)
print(matrix.shape) # (3, 4)
print(matrix)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]

#Pay attention to the differences in the following three cases:
#Case 1
matrix = matrix.reshape(1,-1)
print(matrix.shape) # (1, 12)
print(matrix)
# [[ 1  2  3  4  5  6  7  8  9 10 11 12]]
#Situation 2
matrix = matrix.reshape(3,-1)
print(matrix.shape) # (3, 4)
matrix = matrix.reshape(matrix.size)
print(matrix.shape) # (12,)
print(matrix)
# [ 1  2  3  4  5  6  7  8  9 10 11 12]
#Situation 3
#Expansion matrix
matrix = matrix.reshape(3,-1)
print(matrix.shape) # (3, 4)
matrix = matrix.flatten()
print(matrix.shape) # (12,)
print(matrix)
# [ 1  7  2  8  3  9  4 10  5 11  6 12]

#Matrix transpose
matrix = matrix.reshape(2,6)
print(matrix.shape) # (2, 6)
print(matrix)
# [[ 1  2  3  4  5  6]
#  [ 7  8  9 10 11 12]]
matrix = matrix.T
print(matrix.shape) # (6, 2)
print(matrix)
# [[ 1  7]
#  [ 2  8]
#  [ 3  9]
#  [ 4 10]
#  [ 5 11]
#  [ 6 12]]

### 01-4-2 rank, determinant, trace, eigenvalue and eigenvector of matrix

import numpy as np

matrix = np.array([[1,1,3],[2,2,6],[3,3,9]])
print(matrix)
# [[1 1 3]
#  [2 2 6]
#  [3 3 9]]

#Rank r (a) of matrix
print(np.linalg.matrix_rank(matrix)) # 1

#Determinant det (a) or | a of matrix|
print(np.linalg.det(matrix)) # 0.0

#Diagonal element tr (a) of matrix
print(np.diagonal(matrix)) # [1 2 9]
#The parameter offset can be introduced to offset the main diagonal up and down
#Upward offset
print(np.diagonal(matrix, offset = 1)) # [1 6]
#Downward offset
print(np.diagonal(matrix, offset = -1)) # [2 3]

#The trace tr (a) of the matrix is the sum of diagonal elements
print(np.trace(matrix)) # 12
#You can also offset the main diagonal
print(np.trace(matrix, offset = 1)) # 7

matrix = np.array([[-2,1,1],[0,2,0],[-4,1,3]])
#Eigenvalues and eigenvectors of matrices
eigen_values, eigen_vectors = np.linalg.eig(matrix)
#Eigenvalue
print(eigen_values)
# [-1.  2.  2.]
#Eigenvector
print(eigen_vectors)
# [[-0.70710678 -0.24253563  0.30151134]
#  [ 0.          0.          0.90453403]
#  [-0.70710678 -0.9701425   0.30151134]]

### 01-4-3 addition, subtraction, multiplication and inverse of matrix

import numpy as np

mat_1 = np.array([[1,1,2],[1,1,2],[1,1,2]])
mat_2 = np.array([[1,2,1],[1,2,1],[1,2,1]])

# [[2 3 3]
#  [2 3 3]
#  [2 3 3]]
print(mat_1 + mat_2)
# [[2 3 3]
#  [2 3 3]
#  [2 3 3]]

#Matrix subtraction
print(np.subtract(mat_1, mat_2))
# [[ 0 -1  1]
#  [ 0 -1  1]
#  [ 0 -1  1]]
print(mat_1 - mat_2)
# [[ 0 -1  1]
#  [ 0 -1  1]
#  [ 0 -1  1]]

#Matrix multiplication
print(np.dot(mat_1, mat_2))
# [[4 8 4]
#  [4 8 4]
#  [4 8 4]]
print(mat_1 @ mat_2)
# [[4 8 4]
#  [4 8 4]
#  [4 8 4]]

#Matrix corresponding element multiplication
print(mat_1 * mat_2)
# [[1 2 2]
#  [1 2 2]
#  [1 2 2]]

#Inverse of matrix A-1
mat = np.array([[0,1],[1,0]])
print(np.linalg.inv(mat))
# [[0. 1.]
#  [1. 0.]]

#Verification: a * (A-1) = I
print(mat @ np.linalg.inv(mat))
# [[1. 0.]
#  [0. 1.]]

## 01-5 random number

import numpy as np

#Design random number seed
np.random.seed(0)
#Generate 3 random numbers between 0.0 and 1.0
rand = np.random.random(3)
print(rand) # [0.5488135  0.71518937 0.60276338]

#Generate three random integers between 1 and 10
rand = np.random.randint(0, 11, 3)
print(rand) # [3 7 9]

#Three numbers are extracted from the normal distribution with mean value of 0 and standard deviation of 1
rand = np.random.normal(0, 1, 3)
print(rand) # [-1.42232584  1.52006949 -0.29139398]

#Three numbers were extracted from the logistic distribution with mean value of 0 and dispersion degree of 1
rand = np.random.logistic(0, 1, 3)
print(rand) # [-0.98118713 -0.08939902  1.46416405]

#Extract 3 numbers from the range greater than or equal to 1 and less than 2
rand = np.random.uniform(1, 2, 3)
print(rand) # [1.47997717 1.3927848  1.83607876]