C + + KNN classification algorithm based on Eigenvector

Time:2020-6-28

K-nearest neighbor (KNN) classification algorithm is a relatively mature method in theory and one of the simplest machine learning algorithms. The idea of this method is: if most of the k most similar samples in the feature space belong to a certain category, then the sample also belongs to this category. In KNN algorithm, the selected neighbors are all correctly classified objects. In the decision-making of classification, this method only depends on the category of the nearest sample or samples to determine the category of the samples to be classified. Although KNN method also depends on limit theorem in principle, it is only related to a small number of adjacent samples in class decision-making. Because KNN method mainly depends on the limited adjacent samples around, rather than on the method of identifying the class domain to determine the category, KNN method is more suitable than other methods for the sample set to be divided with more overlapping or overlapping class domain.

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <cmath>
 
using namespace std;
 
//Sample characteristic structure
struct sample
{
 string type;
 vector<double> features;
};
 
//Read training samples train.txt , training sample format: type name + eigenvector
void readTrain(vector<sample>& train, const string& file)
{
 ifstream fin(file.c_ Str()); // file is a string object that stores the file name you want to read and write, and fin is the read stream
 if(!fin)
 {
 cerr<<"Unable to open the input file: "<<file<<endl;
 exit(1);
 }
 
 string line; 
 double d=0.0;
 While (getline (fin, line)) // fin is the read-in stream. Getline reads a line from the input stream fin to line
 {
 istringstream stream(line); //bind to stream to the line we read
 sample ts;
 stream>>ts.type;
 while(stream>>d) //read a word from line
 {
  ts.features.push_ Back (d); // in trains.features Add an element with a value of D at the end of
 }
 train.push_ Back (TS); // add an element with a value of TS at the end of the train
 }
 fin.close();
}
 
//Read test sample test.txt , each line is a eigenvector
void readTest(vector<sample>& test, const string& file)
{
 ifstream fin(file.c_str());
 if(!fin)
 {
 cerr<<"Unable to open the input file: "<<file<<endl;
 exit(1);
 }
 
 string line;
 double d=0.0;
 while(getline(fin,line))
 {
 istringstream stream(line); //bind to stream to the line we read
 sample ts;
 while(stream>>d)
 {
  ts.features.push_back(d);
 }
 test.push_back(ts);
 }
 fin.close();
}
 
//Output result, assign a type to each vector, write result.txt in
void writeResult(const vector<sample>& test, const string& file)
{
 ofstream fout(file.c_str());
 if(!fout)
 {
 cerr<<"Unable to write the input file: "<<endl;
 exit(1);
 }
 
 for(vector<sample>::size_type i=0;i!=test.size();++i)
 {
 fout << test[i].type << '\t';
 for(vector<double>::size_type j=0;j!=test[j].features.size();++j)
 {
  fout<<test[i].features[j]<<' ';
 }
 fout<<endl;
 }
}
 
//Implementation of KNN algorithm
void knnProcess(vector<sample>& test, const vector<sample>& train, const vector<vector<double> >& dm, unsigned int k)
{
 for (vector<sample>::size_type i = 0; i != test.size(); ++i)
 {
 Multimap < double, string > DTS; // save the k points closest to the test sample I
 
 for (vector<double>::size_type j = 0; j != dm[i].size(); ++j)
 {
  if ( dts.size () < K) // insert the first k into DTS
  {
  dts.insert (make_ Pair (DM [i] [J], train [J]. Type)); // when inserting, it will be automatically sorted by double in DTS, with the smallest ranking last
  }
  else
  {
  multimap<double, string>::iterator it = dts.end();
  --it;
 
  If (DM [i] [J] < it - > first) // compare the Euclidean distance between the current test sample I and the current training sample with the minimum distance in DTS. If it is smaller, update DTS
  {
   dts.erase(it);
   dts.insert(make_pair(dm[i][j], train[j].type));
  }
  }
 }
 map<string, double> tds;
 string type = "";
 double weight = 0.0;
 //The following for loop is mainly to find out the categories of most of the K sample points closest to test sample I, that is, as the categories of test sample point I
 for (multimap<double, string>::const_iterator cit = dts.begin(); cit != dts.end(); ++cit)
 {
  //In the case of not considering the weight, add 1 as long as it appears in the K examples
  // ++tds[cit->second];
 
  //Here is the relationship between distance and weight. The greater the distance, the smaller the weight
  tds[cit->second] += 1.0 / cit->first;
  if (tds[cit->second] > weight)
  {
  weight = tds[cit->second];
  Type = CIT - > second; // save the following categories
  }
 }
 test[i].type = type;
 }
}
 
//Calculate Euclidean distance
double euclideanDistance(const vector<double>& v1, const vector<double>& v2)
{
 if(v1.size() != v2.size())
 {
 cerr<<"Unable to get a distance! "<<endl;
 }
 
 else
 {
 double distance = 0.0;
 
 for (vector<double>::size_type i = 0; i != v1.size(); ++i)
 {
  distance += (v1[i] - v2[i]) * (v1[i] - v2[i]);
 }
 return sqrt(distance);
 }
}
 
/*Initialize the distance matrix, which is based on training samples and test samples,
The number of rows of the matrix is the number of test samples, and the number of columns is the number of training samples,
Each row is an array of Euclidean distances from one test sample to each training sample*/
void initDistanceMatrix(vector<vector<double> >& dm, const vector<sample>& train, const vector<sample>& test)
{
 for (vector<sample>::size_type i = 0; i != test.size(); ++i)
 {
 vector<double> vd;
 for (vector<sample>::size_type j = 0; j != train.size(); ++j)
 {
  vd.push_back(euclideanDistance(test[i].features, train[j].features));
 }
 dm.push_back(vd);
 }
}
 
//Encapsulation
void xfxKnn(const string& file1, const string& file2, const string& file3, int k)
{
 vector<sample> train,test;
 readTrain(train, file1.c_str());
 readTest(test, file2.c_str());
 vector< vector<double> > dm;
 initDistanceMatrix(dm, train, test);
 knnProcess(test, train, dm, k);
 writeResult(test, file3.c_str());
}
 
//Testing
int main()
{
 xfxKnn("train.txt", "test.txt", "result.txt", 5);
 return 0;
}

The above is the whole content of this article. I hope it will help you in your study, and I hope you can support developepaer more.

Recommended Today

Comparison and analysis of Py = > redis and python operation redis syntax

preface R: For redis cli P: Redis for Python get ready pip install redis pool = redis.ConnectionPool(host=’39.107.86.223′, port=6379, db=1) redis = redis.Redis(connection_pool=pool) Redis. All commands I have omitted all the following commands. If there are conflicts with Python built-in functions, I will add redis Global command Dbsize (number of returned keys) R: dbsize P: print(redis.dbsize()) […]