Canal dynamically monitors mysql, parses the binlog log log, and sends the collected data to Kafka

Time:2020-11-19

The producer needs to convert the data sent into byte array before it can be launched to Kafka through the network. For some simple data, Kafka comes with some serialization tools.

//Create producer instance
private static Producer<String , String> createProducer(){
    Properties properties = new Properties();

    properties.put("metadata.broker.list" , GlobalConfigUtil.kafkaBootstrap);
    properties.put("zookeeper.connect" , GlobalConfigUtil.kafkaZookeeper);
    properties.put("serializer.class" , StringEncoder.class.getName());
    
    return new Producer<String, String>(new ProducerConfig(properties));
}

In common microservices, various responsible data structures need to be transferred frequently between services, but Kafka only supports simple types such as string and integer. So we use jsonobject between services, because JSON can be easily converted to string, and string serialization and deserialization are already supported.

JSONObject jsonObject = new JSONObject();
jsonObject.put("logFileName", logFileName);
jsonObject.put("logFileOffset", logFileOffset);
jsonObject.put("dbName", dbName);
jsonObject.put("tableName", tableName);
jsonObject.put("eventType", eventType);
jsonObject.put("columnValueList", columnValueList);
jsonObject.put("emptyCount", emptyCount);
jsonObject.put("timestamp", timestamp);

//Splicing all fields parsed by binlog
String data = JSON.toJSONString(jsonObject);

//The parsed data is sent to Kafka
KafkaSender.sendMessage(GlobalConfigUtil.kafkaInput, JSON.toJSONString(key), data);

The resourcebundle class is used to read the propertise resource file. The configuration items can be read in once during initialization and saved in static member variables. Avoid reading the class of the relevant configuration file every time you need it. The I / O speed is slow, which is easy to cause performance bottlenecks.

//Read application.properties file
private static ResourceBundle resourceBundle = ResourceBundle.getBundle("application");

public static String canalHost= resourceBundle.getString("canal.host");
public static String canalPort = resourceBundle.getString("canal.port");
public static String canalInstance = resourceBundle.getString("canal.instance");
public static String mysqlUsername = resourceBundle.getString("mysql.username");
public static String mysqlPassword=  resourceBundle.getString("mysql.password");
public static String kafkaBootstrap= resourceBundle.getString("kafka.bootstrap.servers");
public static String kafkaZookeeper= resourceBundle.getString("kafka.zookeeper.connect");
public static String kafkaInput = resourceBundle.getString("kafka.input.topic");

Complete code

#POM file
<dependency>  
    <groupId>com.alibaba.otter</groupId>  
    <artifactId>canal.client</artifactId>  
    <version>1.0.24</version>  
</dependency>  
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->  
<dependency>  
    <groupId>org.apache.kafka</groupId>  
    <artifactId>kafka_2.11</artifactId>  
    <version>0.9.0.1</version>  
    <exclusions>  
        <exclusion>  
            <groupId>org.slf4j</groupId>  
            <artifactId>slf4j-log4j12</artifactId>  
        </exclusion>  
    </exclusions>  
</dependency>  
  
<! -- objects and JSON are converted to each other -- >  
<dependency>  
    <groupId>com.alibaba</groupId>  
    <artifactId>fastjson</artifactId>  
    <version>1.2.44</version>  
</dependency>
import java.util.Locale;  
import java.util.ResourceBundle;  
  
/**  
 *Common class of configuration file  
 */  
public class GlobalConfigUtil {  
  
    //Read application.properties file  
 private static ResourceBundle resourceBundle = ResourceBundle.getBundle("application");  
  
    public static String canalHost= resourceBundle.getString("canal.host");  
    public static String canalPort = resourceBundle.getString("canal.port");  
    public static String canalInstance = resourceBundle.getString("canal.instance");  
    public static String mysqlUsername = resourceBundle.getString("mysql.username");  
    public static String mysqlPassword=  resourceBundle.getString("mysql.password");  
    public static String kafkaBootstrap= resourceBundle.getString("kafka.bootstrap.servers");  
    public static String kafkaZookeeper= resourceBundle.getString("kafka.zookeeper.connect");  
    public static String kafkaInput = resourceBundle.getString("kafka.input.topic");  
  
    public static void main(String[] args) {  
        System.out.println(canalHost);  
    }  
}
import kafka.javaapi.producer.Producer;  
import kafka.producer.KeyedMessage;  
import kafka.producer.ProducerConfig;  
import kafka.serializer.StringEncoder;  
  
import java.util.Properties;  
  
/**  
 *Kafka production message tool class  
 */  
public class KafkaSender {  
    private String topic;  
  
    public KafkaSender(String topic){  
        super();  
        this.topic = topic;  
    }  
  
    /**  
 *Send a message to Kafka to specify the topic  
 ** @ param topic topic name  
 *@ param key value  
 *@ param data data  
 */  
 public static void sendMessage(String topic , String key , String data){  
        Producer<String, String> producer = createProducer();  
        producer.send(new KeyedMessage<String , String>(topic , key , data));  
    }  
  
    /**  
 *Create producer instance  
 * @return  
 */  
 private static Producer<String , String> createProducer(){  
        Properties properties = new Properties();  
  
        properties.put("metadata.broker.list" , GlobalConfigUtil.kafkaBootstrap);  
        properties.put("zookeeper.connect" , GlobalConfigUtil.kafkaZookeeper);  
        properties.put("serializer.class" , StringEncoder.class.getName());  
  
        return new Producer<String, String>(new ProducerConfig(properties));  
    }  
}
import com.alibaba.fastjson.JSON;  
import com.alibaba.fastjson.JSONObject;  
import com.alibaba.otter.canal.client.CanalConnector;  
import com.alibaba.otter.canal.client.CanalConnectors;  
import com.alibaba.otter.canal.protocol.CanalEntry;  
import com.alibaba.otter.canal.protocol.Message;  
  
import java.net.InetSocketAddress;  
import java.util.ArrayList;  
import java.util.List;  
import java.util.UUID;  
  
/**  
 *Canal parsing binlog log log tool class  
 */  
public class CanalClient {  
  
    static class ColumnValuePair {  
        private String columnName;  
        private String columnValue;  
        private Boolean isValid;  
  
        public ColumnValuePair(String columnName, String columnValue, Boolean isValid) {  
            this.columnName = columnName;  
            this.columnValue = columnValue;  
            this.isValid = isValid;  
        }  
  
        public String getColumnName() { return columnName; }  
        public void setColumnName(String columnName) { this.columnName = columnName; }  
        public String getColumnValue() { return columnValue; }  
        public void setColumnValue(String columnValue) { this.columnValue = columnValue; }  
        public Boolean getIsValid() { return isValid; }  
        public void setIsValid(Boolean isValid) { this.isValid = isValid; }  
    }  
  
    /**  
 *Get canal connection  
 *  
 *@ param host host host name  
 *@ param port port port number  
 *@ param instance canal instance name  
 *@ param username user name  
 *@ param password password  
 *@ return canal connector  
 */  
 public static CanalConnector getConn(String host, int port, String instance, String username, String password) {  
        CanalConnector canalConnector = CanalConnectors.newSingleConnector(new InetSocketAddress(host, port), instance, username, password);  
        return canalConnector;  
    }  
  
    /**  
 *Parsing binlog logs  
 *  
 *@ param entries binlog message entity  
 *The sequence number of the @ param emptycount operation  
 */  
 public static void analysis(List<CanalEntry.Entry> entries, int emptyCount) {  
        for (CanalEntry.Entry entry : entries) {  
            //Only the operation of MySQL transaction is resolved, others are not resolved  
 if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN ||  
                    entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {  
                continue;  
            }  
  
            //Then parse binlog  
 CanalEntry.RowChange rowChange = null;  
  
            try {  
                rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue());  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
  
            //Get operation type field (add, delete, modify)  
 CanalEntry.EventType eventType = rowChange.getEventType();  
            //Get binlog file name  
 String logfileName = entry.getHeader().getLogfileName();  
            //Read the location of the current operation in the binlog file  
 long logfileOffset = entry.getHeader().getLogfileOffset();  
            //Gets the database to which the current operation belongs  
 String dbName = entry.getHeader().getSchemaName();  
            //Gets the table to which the current operation belongs  
 String tableName =  entry.getHeader (). Gettablename(); // which table is currently operating  
 long timestamp =  entry.getHeader (). Getexecutetime(); // execution time  
  
 //The row data of the parse operation  
 for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) {  
                //Delete operation  
 if (eventType == CanalEntry.EventType.DELETE) {  
                    //Get all column data before deletion  
 dataDetails(rowData.getBeforeColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);  
                }  
                //Add operation  
 else if (eventType == CanalEntry.EventType.INSERT) {  
                    //Get all column data after adding  
 dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);  
                }  
                //Update operation  
 else {  
                    //Get all column data after update  
 dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);  
                }  
            }  
        }  
    }  
  
    /**  
 *Parsing the data of a specific binlog message  
 *  
 *@ param columns all column data of the current row  
 *@ param logfilename binlog file name  
 *@ param logfileoffset position of current operation in binlog  
 *@ param dbname the name of the database to which the current operation belongs  
 *@ param tablename the name of the table to which the current operation belongs  
 *@ param eventtype current operation type (add, modify, delete)  
 *The sequence number of the @ param emptycount operation  
 */  
 private static void dataDetails(List<CanalEntry.Column> columns,  
                                    String logFileName,  
                                    Long logFileOffset,  
                                    String dbName,  
                                    String tableName,  
                                    CanalEntry.EventType eventType,  
                                    int emptyCount,  
                                    long timestamp) {  
  
        //Find the current columns that have changed and the changed values  
 List<ColumnValuePair> columnValueList = new ArrayList<ColumnValuePair>();  
  
        for (CanalEntry.Column column : columns) {  
            ColumnValuePair columnValuePair = new ColumnValuePair(column.getName(), column.getValue(), column.getUpdated());  
            columnValueList.add(columnValuePair);  
        }  
  
        String key = UUID.randomUUID().toString();  
        JSONObject jsonObject = new JSONObject();  
//        jsonObject.put("logFileName", logFileName);  
//        jsonObject.put("logFileOffset", logFileOffset);  
 jsonObject.put("dbName", dbName);  
        jsonObject.put("tableName", tableName);  
        jsonObject.put("eventType", eventType);  
        jsonObject.put("columnValueList", columnValueList);  
//        jsonObject.put("emptyCount", emptyCount);  
//        jsonObject.put("timestamp", timestamp);  
  
  
 //Splicing all fields parsed by binlog  
 String data = JSON.toJSONString(jsonObject);  
  
        System.out.println("【JSON】" + data);  
  
        //The parsed data is sent to Kafka  
 KafkaSender.sendMessage(GlobalConfigUtil.kafkaInput, JSON.toJSONString(key), data);  
    }  
  
    /**  
 *Client entry method  
 * @param args  
 */  
 public static void main(String[] args) {  
        //Load configuration file  
 String host = GlobalConfigUtil.canalHost;  
        int port = Integer.parseInt(GlobalConfigUtil.canalPort);  
        String instance = GlobalConfigUtil.canalInstance;  
        String username = GlobalConfigUtil.mysqlUsername;  
        String password = GlobalConfigUtil.mysqlPassword;  
  
        //Get canal connection  
 CanalConnector conn = getConn(host, port, instance, username, password);  
  
        //Read data from binlog  
 int batchSize = 100;  
        int emptyCount = 1;  
  
        try {  
            conn.connect();  
            conn.subscribe(".*..*");  
            conn.rollback();  
  
            Int totalcount = 120; // number of cycles  
  
 while (emptyCount < totalCount) {  
                //Access to data  
 Message message = conn.getWithoutAck(batchSize);  
  
                long id = message.getId();  
                int size = message.getEntries().size();  
                if (id == -1 || size == 0) {  
                    emptyCount=0;  
                    //No data was read  
 System.out.println ("no data has been read...");  
                } else {  
                    //If there is data, parse the binlog log log  
 analysis(message.getEntries(), emptyCount);  
                    emptyCount++;  
                }  
                //Confirmation message  
 conn.ack(message.getId());  
            }  
        } catch (Exception e) {  
            e.printStackTrace();  
        } finally {  
            conn.disconnect();  
        }  
    }  
}
Wei application.properties , please change to your own database information below  
canal.host=xxx.xx.xxx.xxx  
canal.port=11111  
canal.instance=example  
mysql.username=root  
mysql.password=xxxxxx  
kafka.bootstrap.servers = xxx.xx.xxx.xxx:9092  
kafka.zookeeper.connect = xxx.xx.xxx.xxx:2182  
kafka.input.topic=test

For specific code, please move to:SimpleMysqlCanalKafkaSample