c++实现手写数字识别

240 阅读5分钟

算法之前用python试过一遍,里面有基本原理

bp神经网络实现

首先简单实现一个mat类型,用AVX指令加速,std::allocator管理内存

//mat.h
#pragma once
#include <immintrin.h>
#include<initializer_list>
#include<iostream>
#include<random>
#include<xmemory>

static std::allocator<float> alloc;
struct mat
{
	int size = 0;
	int row = 0;
	int col = 0;

	float* ptr = nullptr;
	

	mat() = default;
	mat(int row, int col, double value){
		this->row = row;
		this->col = col;
		size = row * col;
		ptr = alloc.allocate(size);
		float val = float(value);
		std::fill(ptr, ptr + size, val);
	}

	mat(std::initializer_list<std::initializer_list<float>> lists){
		row = int(lists.size());
		col = int(lists.begin()->size());
		size = row * col;
		ptr = alloc.allocate(size);

		int idx = 0;

		for (auto& list : lists) {
			for (auto& e : list) {
				*(ptr + idx) = e;
				idx++;
			}
		}

	}

	mat(const mat& another){
		row = another.row;
		col = another.col;
		size = another.size;
		if (ptr != nullptr) {
			alloc.deallocate(ptr, size);
		}
		ptr = alloc.allocate(size);

		std::copy(another.ptr, another.ptr + size, ptr);
	}
	mat(mat&& another)noexcept{
		if (this == &another)return;
		row = another.row;
		col = another.col;
		size = another.size;
		if (ptr != nullptr) {
			alloc.deallocate(ptr, size);
		}
		ptr = another.ptr;

		another.row = another.col = another.size = 0;
		another.ptr = nullptr;
	}

	mat& operator =(const mat& another){
		if (this == &another)return *this;
		row = another.row;
		col = another.col;
		size = row * col;
		if (ptr != nullptr) {
			alloc.deallocate(ptr, size);
		}
		ptr = alloc.allocate(size);
		std::copy(another.ptr, another.ptr + size, ptr);

		return *this;
	}
	mat& operator =(mat&& another)noexcept{
		if (this == &another)return *this;
		row = another.row;
		col = another.col;
		size = another.size;
		if (ptr != nullptr) {
			alloc.deallocate(ptr, size);
		}
		ptr = another.ptr;

		another.row = another.col = another.size = 0;
		another.ptr = nullptr;
		return *this;
	}

	float& operator ()(int row, int col){
		return ptr[this->col * row + col];
	}

	~mat(){
		if (ptr != nullptr) {
			alloc.deallocate(ptr, size);
		}
	}
	
	void show() {
		printf("\n");
		for (int i = 0; i < row; ++i) {
			for (int j = 0; j < col; ++j) {
				printf("%.4f ", ptr[i * col + j]);
			}
			printf("\n");
		}
	}
};

inline mat rand(int row,int col){
	mat res(row,col, 0);
	std::mt19937 engine(std::random_device{}());
	std::normal_distribution<float> distribution(0.0,sqrt(1.0/col));

	float* pCur = res.ptr;
	for (int i = 0; i < res.size; ++i) {
		*pCur = distribution(engine);
		pCur++;
	}
	return res;
}

inline mat matMul(mat a, mat b) {
	if (b.row != a.col) throw("error");
	mat res(a.row, b.col, 0);
	for (int i = 0; i < a.row; ++i) {
		for (int k = 0; k < a.col; ++k) {
			__m256 va = _mm256_set1_ps(a.ptr[i * a.col + k]);
			int j = 0;
			for (; j < b.col - 7; j += 8) {
				__m256 vb = _mm256_loadu_ps(b.ptr + k * b.col + j);
				__m256 vr = _mm256_loadu_ps(res.ptr + i * res.col + j);
				vr = _mm256_fmadd_ps(va, vb, vr);
				_mm256_storeu_ps(res.ptr + i * res.col + j, vr);
				//res(i, j) += a(i, k) * b(k, j);
			}
			for (; j < b.col; ++j) {
				res.ptr[i * res.col + j] += a.ptr[i * a.col + k] * b.ptr[k * b.col + j];
			}
		}
	}
	return res;
	
}

inline mat sigmoid(const mat& t){
	mat res(t.row, t.col, 0);
	int i;
	for (i = 0; i < t.size - 7; i += 8) {
		__m256 vecT = _mm256_loadu_ps(t.ptr + i); 
		__m256 one = _mm256_set1_ps(1.0f); 
		__m256 negVecT = _mm256_sub_ps(_mm256_setzero_ps(), vecT);
		__m256 expNegVecT = _mm256_exp_ps(negVecT); 
		__m256 sum = _mm256_add_ps(one, expNegVecT);
		__m256 invSum = _mm256_div_ps(one, sum); 

		_mm256_storeu_ps(res.ptr + i, invSum);
	}

	for (; i < t.size; ++i) {
		res.ptr[i] = 1.0f /(1 + exp(-t.ptr[i]));
	}
	return res;
}

inline mat d_sigmoid(const mat& t){
	mat res(t.row, t.col, 0);
	float* pRes = res.ptr;
	float* pA = t.ptr;

	int i;
	for (i = 0; i < t.size - 7; i += 8) {
		__m256 vecT = _mm256_loadu_ps(t.ptr + i);
		__m256 one = _mm256_set1_ps(1.0f);
		__m256 negVecT = _mm256_sub_ps(_mm256_setzero_ps(), vecT);
		__m256 expNegVecT = _mm256_exp_ps(negVecT);
		__m256 sum = _mm256_add_ps(one, expNegVecT);
		__m256 invSum = _mm256_div_ps(one, sum);
		__m256 one_sub_sum = _mm256_sub_ps(one, invSum);
		__m256 targetVal = _mm256_mul_ps(invSum, one_sub_sum);

		_mm256_storeu_ps(res.ptr + i, targetVal);
	}

	for (; i < t.size; ++i) {
		float u = 1 / (1 + std::exp(-t.ptr[i]));
		res.ptr[i] = u * (1 - u);
	}
	return res;
}

inline mat transpose(mat& t){
	mat res(t.col, t.row, 0);
	const int dt = t.col;
	float* pRes = res.ptr;
	for (int i = 0; i < res.row; ++i) {
		float* pT = t.ptr + i;
		for (int j = 0; j < res.col; ++j) {
			*pRes = *pT;
			pRes++; pT += dt;
		}
	}
	return res;
}

inline mat add(mat a, mat b){
	if (a.row != b.row || a.col != b.col) throw("error");
	mat res(a.row, a.col, 0);
	__m256 va, vb, vr;
	int i = 0;
	for (; i< a.size - 7; i += 8) {
		va = _mm256_loadu_ps(a.ptr + i);
		vb = _mm256_loadu_ps(b.ptr + i);
		vr = _mm256_add_ps(va, vb);
		_mm256_storeu_ps(res.ptr + i, vr);
	}
	for (; i < a.size; i++) {
		res.ptr[i] = a.ptr[i] + b.ptr[i];
	}
	return res;
}

inline mat sub(mat a, mat b){
	if (a.row != b.row || a.col != b.col) throw("error");
	mat res(a.row, a.col, 0);

	__m256 va, vb, vr;
	int i = 0;
	for(; i < a.size - 7; i += 8) {
		va = _mm256_loadu_ps(a.ptr + i);
		vb = _mm256_loadu_ps(b.ptr + i); 
		vr = _mm256_sub_ps(va, vb);
		_mm256_storeu_ps(res.ptr + i, vr);
	}
	for (; i < a.size; i++) {
		res.ptr[i] = a.ptr[i] - b.ptr[i];
	}

	return res;
}

inline mat numMul(mat& t ,double data) {
	float val = float(data);
	mat res(t.row, t.col, 0);
	__m256 vecV = _mm256_set1_ps(val);
	int i = 0;
	for (; i < t.size - 7; i += 8) {
		__m256 vecT = _mm256_loadu_ps(t.ptr + i);
		__m256 vecR = _mm256_mul_ps(vecT, vecV);
		_mm256_storeu_ps(res.ptr + i, vecR);
	}
	for (; i < t.size; i++) {
		res.ptr[i] = t.ptr[i] * val;
	}
	return res;
}

inline mat dot(mat a, mat b) {
	if (a.row != b.row || a.col != b.col) throw("error");
	mat res(a.row, a.col, 0);
	__m256 va, vb, vr;
	int i = 0;
	for (; i < a.size - 7; i += 8) {
		va = _mm256_loadu_ps(a.ptr + i);
		vb = _mm256_loadu_ps(b.ptr + i);
		vr = _mm256_mul_ps(va, vb);
		_mm256_storeu_ps(res.ptr + i, vr);
	}
	for (; i < a.size; ++i) {
		res.ptr[i] = a.ptr[i] * b.ptr[i];
	}
	return res;
}

然后是csv文件读写

//file.h
#pragma once
#include<string>
#include<vector>
#include<fstream>
#include"mat.h"

struct batch 
{
	mat input;
	mat output;
	batch(mat&& in, mat&& out) {
		input = in;
		output = out;
	}
};
std::vector<batch> trainData;
std::vector<batch> testData;

inline std::vector<float> split(std::string& line, char delimiter) {
	line += '\n';//读进来没换行符了,记得加上
	std::vector<float> tokens;
	std::string token;
	for (int i = 0; i < line.size(); ++i) {
		if (line[i] == '\n' || line[i] == ',') {
			tokens.emplace_back(std::stof(token));
			token.assign("");
		}
		else {
			token += line[i];
		}
	}
	return tokens;
}

inline std::vector<std::vector<float>> readCSV(const std::string& filename) {
	std::ifstream file(filename);
	std::vector<std::vector<float>> data;
	if (file.is_open()) {
		std::string line;
		while (std::getline(file, line)) {
			data.emplace_back(split(line, ','));
		}
	}
	return data;
}

inline mat to_mat_train(std::vector<float>& data) {
	mat res(1, 784, 0);
	for (int i = 0; i < 784; ++i) {
		res.ptr[i] = (data[i + 1] +1.0f) /257.0f;
		//归一化
	}
	return res;
}
inline mat to_mat_test(std::vector<float>& data) {
	mat res(1, 10, 0);
	int idx = data[0];
	res.ptr[idx] = 0.99f;
	return res;
}

void getData(const std::string& filename_train, const std::string&  filename_test) {
	printf("start reading data\n");
	std::vector<std::vector<float>> train_data = readCSV(filename_train);
	std::vector<std::vector<float>> test_data = readCSV(filename_test);

	for (auto& e : train_data) {
		trainData.emplace_back(
			batch(to_mat_train(e), 
				to_mat_test(e))
		);
	}
	for (auto& e : test_data) {
		testData.emplace_back(
			batch(to_mat_train(e), 
				to_mat_test(e))
		);
	}
	printf("end reading data\n");
	printf("trainData size %lld\n",train_data.size());
	printf("testData size %lld\n",test_data.size());
}

最后实现的是之前写过的算法,不过这个版本没写偏置

//neu.h
class Dense {
	friend class NeuNet;
private:
	mat input;
	mat output;
	mat active_output;
	mat net;
	mat dW;
	mat error;
public:
	Dense() = default;
	Dense(int inputSize, int outputSize) {
		net = rand(inputSize, outputSize);
	}
	mat forward(mat& inputData){
		input = std::move(inputData);
		output = matMul(input, net);
		active_output = sigmoid(output);
		return active_output;
	}
	//注意符号顺序 error = output - target
	mat cacuError(mat& error) {
		this->error = std::move(error);
		mat prev_error = matMul(this->error, transpose(net));
		return prev_error;
	}
	void backward(float lr) {
		dW = matMul(transpose(input), dot(error, d_sigmoid(output)));
		net = sub(net, numMul(dW, lr));
	}
};

class NeuNet {
private:
	mat inputData;
	mat targetData;
	std::vector<Dense> dense;
	float lr;
public:
	void addDense(int inputSize, int outputSize) {
		dense.emplace_back(Dense(inputSize, outputSize));
	}
	void addData(mat& inputMat, mat& targetMat) {
		inputData = inputMat;
		targetData = targetMat;
	}
	void setLr(double learningRate) {
		lr = float(learningRate);
	}
	void forward() {
		mat fwd = inputData;
		for (size_t i = 0; i < dense.size(); ++i) {
			fwd = dense[i].forward(fwd);
		}
	}
	void backward() {
		int backIdx = int(dense.size() - 1);
		mat err = sub(dense[backIdx].active_output, targetData);
		for (int i = backIdx; i >= 0; --i) {
			err = dense[i].cacuError(err);
			dense[i].backward(this->lr);
		}

	}
	mat querry(mat& inputMat) {
		mat fwd = inputMat;
		for (size_t i = 0; i < dense.size(); ++i) {
			fwd = dense[i].forward(fwd);
		}
		return fwd;
	}
};

主函数调用

//main.cpp
#include<iostream>
#include<Windows.h>
#include"neu.h"
#include"file.h"

inline int findMaxIdx(mat& t) {
	float maxVal = 0;
	int maxIdx = 0;
	for (int i = 0; i < t.size; ++i) {
		if (t.ptr[i] > maxVal) {
			maxVal = t.ptr[i];
			maxIdx = i;
		}
	}
	return maxIdx;
}

int main() {
	NeuNet net;
	net.setLr(0.02);
	net.addDense(784, 100);
	net.addDense(100, 10);

	getData("E:\\dev\\neuralNetwork\\mnist\\mnist_train.csv",
		"E:\\dev\\neuralNetwork\\mnist\\mnist_test.csv");

	int s = clock();
	for (size_t i = 0; i < trainData.size(); i++) {
		net.addData(trainData[i].input, trainData[i].output);
		net.forward();
		net.backward();
		if ((i + 1) % 1000 == 0) {
			printf("batch %5lld/%5lld finished\n", (i + 1),trainData.size());
		}
	}
	int e = clock();
	float meet = 0;
	for (size_t i = 0; i < testData.size(); i++) {
		mat tmp = net.querry(testData[i].input);
		if (findMaxIdx(tmp) == findMaxIdx(testData[i].output)) {
			meet += 1;
		}
	}

	float accuracy = meet / float(testData.size());
	printf("final accuracy %.4f\n", accuracy);
	printf("training had used %d ms", e - s);
}